main 56735b70df9c cached
471 files
33.7 MB
1.5M tokens
1056 symbols
1 requests
Download .txt
Showing preview only (6,131K chars total). Download the full file or copy to clipboard to get everything.
Repository: Azure-Samples/azure-search-openai-demo
Branch: main
Commit: 56735b70df9c
Files: 471
Total size: 33.7 MB

Directory structure:
gitextract_llud8udi/

├── .azdo/
│   └── pipelines/
│       └── azure-dev.yml
├── .devcontainer/
│   └── devcontainer.json
├── .gitattributes
├── .github/
│   ├── CODE_OF_CONDUCT.md
│   ├── ISSUE_TEMPLATE.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── agents/
│   │   ├── fixer.agent.md
│   │   └── triager.agent.md
│   ├── dependabot.yaml
│   ├── instructions/
│   │   └── bicep.instructions.md
│   ├── prompts/
│   │   └── review_pr_comments.prompt.md
│   ├── skills/
│   │   └── github-pr-inline-reply/
│   │       └── SKILL.md
│   └── workflows/
│       ├── azure-dev-validation.yaml
│       ├── azure-dev.yml
│       ├── evaluate.yaml
│       ├── frontend.yaml
│       ├── lint-markdown.yml
│       ├── nightly-jobs.yaml
│       ├── python-test.yaml
│       ├── stale-bot.yml
│       └── validate-markdown.yml
├── .gitignore
├── .markdownlint-cli2.jsonc
├── .pre-commit-config.yaml
├── .vscode/
│   ├── extensions.json
│   ├── launch.json
│   ├── settings.json
│   └── tasks.json
├── AGENTS.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── app/
│   ├── backend/
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── app.py
│   │   ├── approaches/
│   │   │   ├── __init__.py
│   │   │   ├── approach.py
│   │   │   ├── chatreadretrieveread.py
│   │   │   ├── promptmanager.py
│   │   │   └── prompts/
│   │   │       ├── chat_answer.system.jinja2
│   │   │       ├── chat_answer.user.jinja2
│   │   │       ├── chat_query_rewrite_tools.json
│   │   │       └── query_rewrite.system.jinja2
│   │   ├── chat_history/
│   │   │   ├── __init__.py
│   │   │   └── cosmosdb.py
│   │   ├── config.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── authentication.py
│   │   │   └── sessionhelper.py
│   │   ├── custom_uvicorn_worker.py
│   │   ├── decorators.py
│   │   ├── error.py
│   │   ├── gunicorn.conf.py
│   │   ├── load_azd_env.py
│   │   ├── main.py
│   │   ├── prepdocs.py
│   │   ├── prepdocslib/
│   │   │   ├── __init__.py
│   │   │   ├── blobmanager.py
│   │   │   ├── cloudingestionstrategy.py
│   │   │   ├── csvparser.py
│   │   │   ├── embeddings.py
│   │   │   ├── figureprocessor.py
│   │   │   ├── fileprocessor.py
│   │   │   ├── filestrategy.py
│   │   │   ├── htmlparser.py
│   │   │   ├── integratedvectorizerstrategy.py
│   │   │   ├── jsonparser.py
│   │   │   ├── listfilestrategy.py
│   │   │   ├── mediadescriber.py
│   │   │   ├── page.py
│   │   │   ├── parser.py
│   │   │   ├── pdfparser.py
│   │   │   ├── searchmanager.py
│   │   │   ├── servicesetup.py
│   │   │   ├── strategy.py
│   │   │   ├── textparser.py
│   │   │   ├── textprocessor.py
│   │   │   └── textsplitter.py
│   │   ├── requirements.in
│   │   ├── requirements.txt
│   │   └── setup_cloud_ingestion.py
│   ├── frontend/
│   │   ├── .npmrc
│   │   ├── .nvmrc
│   │   ├── .prettierignore
│   │   ├── .prettierrc.json
│   │   ├── index.html
│   │   ├── package.json
│   │   ├── src/
│   │   │   ├── api/
│   │   │   │   ├── api.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── models.ts
│   │   │   ├── authConfig.ts
│   │   │   ├── components/
│   │   │   │   ├── AnalysisPanel/
│   │   │   │   │   ├── AgentPlan.tsx
│   │   │   │   │   ├── AnalysisPanel.module.css
│   │   │   │   │   ├── AnalysisPanel.tsx
│   │   │   │   │   ├── AnalysisPanelTabs.tsx
│   │   │   │   │   ├── ThoughtProcess.tsx
│   │   │   │   │   ├── TokenUsageGraph.tsx
│   │   │   │   │   ├── agentPlanUtils.ts
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── Answer/
│   │   │   │   │   ├── Answer.module.css
│   │   │   │   │   ├── Answer.tsx
│   │   │   │   │   ├── AnswerError.tsx
│   │   │   │   │   ├── AnswerIcon.tsx
│   │   │   │   │   ├── AnswerLoading.tsx
│   │   │   │   │   ├── AnswerParser.tsx
│   │   │   │   │   ├── SpeechOutputAzure.tsx
│   │   │   │   │   ├── SpeechOutputBrowser.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── ClearChatButton/
│   │   │   │   │   ├── ClearChatButton.module.css
│   │   │   │   │   ├── ClearChatButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── Example/
│   │   │   │   │   ├── Example.module.css
│   │   │   │   │   ├── Example.tsx
│   │   │   │   │   ├── ExampleList.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HelpCallout/
│   │   │   │   │   ├── HelpCallout.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── HistoryButton/
│   │   │   │   │   ├── HistoryButton.module.css
│   │   │   │   │   ├── HistoryButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HistoryItem/
│   │   │   │   │   ├── HistoryItem.module.css
│   │   │   │   │   ├── HistoryItem.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HistoryPanel/
│   │   │   │   │   ├── HistoryPanel.module.css
│   │   │   │   │   ├── HistoryPanel.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HistoryProviders/
│   │   │   │   │   ├── CosmosDB.ts
│   │   │   │   │   ├── HistoryManager.ts
│   │   │   │   │   ├── IProvider.ts
│   │   │   │   │   ├── IndexedDB.ts
│   │   │   │   │   ├── None.ts
│   │   │   │   │   └── index.ts
│   │   │   │   ├── LoginButton/
│   │   │   │   │   ├── LoginButton.module.css
│   │   │   │   │   ├── LoginButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── MarkdownViewer/
│   │   │   │   │   ├── MarkdownViewer.module.css
│   │   │   │   │   ├── MarkdownViewer.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── QuestionInput/
│   │   │   │   │   ├── QuestionInput.module.css
│   │   │   │   │   ├── QuestionInput.tsx
│   │   │   │   │   ├── SpeechInput.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── Settings/
│   │   │   │   │   ├── Settings.module.css
│   │   │   │   │   └── Settings.tsx
│   │   │   │   ├── SettingsButton/
│   │   │   │   │   ├── SettingsButton.module.css
│   │   │   │   │   ├── SettingsButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── SupportingContent/
│   │   │   │   │   ├── SupportingContent.module.css
│   │   │   │   │   ├── SupportingContent.tsx
│   │   │   │   │   ├── SupportingContentParser.ts
│   │   │   │   │   └── index.ts
│   │   │   │   ├── TokenClaimsDisplay/
│   │   │   │   │   ├── TokenClaimsDisplay.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── UploadFile/
│   │   │   │   │   ├── UploadFile.module.css
│   │   │   │   │   ├── UploadFile.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── UserChatMessage/
│   │   │   │   │   ├── UserChatMessage.module.css
│   │   │   │   │   ├── UserChatMessage.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   └── VectorSettings/
│   │   │   │       ├── VectorSettings.module.css
│   │   │   │       ├── VectorSettings.tsx
│   │   │   │       └── index.ts
│   │   │   ├── i18n/
│   │   │   │   ├── LanguagePicker.module.css
│   │   │   │   ├── LanguagePicker.tsx
│   │   │   │   ├── config.ts
│   │   │   │   └── index.tsx
│   │   │   ├── index.css
│   │   │   ├── index.tsx
│   │   │   ├── layoutWrapper.tsx
│   │   │   ├── locales/
│   │   │   │   ├── da/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── en/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── es/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── fr/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── it/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── ja/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── nl/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── pl/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── ptBR/
│   │   │   │   │   └── translation.json
│   │   │   │   └── tr/
│   │   │   │       └── translation.json
│   │   │   ├── loginContext.tsx
│   │   │   ├── pages/
│   │   │   │   ├── NoPage.tsx
│   │   │   │   ├── chat/
│   │   │   │   │   ├── Chat.module.css
│   │   │   │   │   └── Chat.tsx
│   │   │   │   └── layout/
│   │   │   │       ├── Layout.module.css
│   │   │   │       └── Layout.tsx
│   │   │   └── vite-env.d.ts
│   │   ├── tsconfig.json
│   │   └── vite.config.ts
│   ├── functions/
│   │   ├── __init__.py
│   │   ├── document_extractor/
│   │   │   ├── .funcignore
│   │   │   ├── function_app.py
│   │   │   └── host.json
│   │   ├── figure_processor/
│   │   │   ├── .funcignore
│   │   │   ├── function_app.py
│   │   │   └── host.json
│   │   └── text_processor/
│   │       ├── .funcignore
│   │       ├── function_app.py
│   │       └── host.json
│   ├── start.ps1
│   └── start.sh
├── azure.yaml
├── data/
│   ├── Json_Examples/
│   │   ├── 2189.json
│   │   ├── 2190.json
│   │   ├── 2191.json
│   │   ├── 2192.json
│   │   └── query.json
│   └── Zava_Company_Overview.md
├── docs/
│   ├── README.md
│   ├── agentic_retrieval.md
│   ├── appservice.md
│   ├── architecture.md
│   ├── azd.md
│   ├── azure_app_service.md
│   ├── azure_container_apps.md
│   ├── customization.md
│   ├── data_ingestion.md
│   ├── deploy_existing.md
│   ├── deploy_features.md
│   ├── deploy_freetrial.md
│   ├── deploy_lowcost.md
│   ├── deploy_private.md
│   ├── deploy_troubleshooting.md
│   ├── evaluation.md
│   ├── http_protocol.md
│   ├── localdev.md
│   ├── login_and_acl.md
│   ├── monitoring.md
│   ├── multimodal.md
│   ├── other_samples.md
│   ├── productionizing.md
│   ├── reasoning.md
│   ├── safety_evaluation.md
│   ├── sharing_environments.md
│   └── textsplitter.md
├── evals/
│   ├── evaluate.py
│   ├── evaluate_config.json
│   ├── evaluate_config_multimodal.json
│   ├── generate_ground_truth.py
│   ├── ground_truth.jsonl
│   ├── ground_truth_kg.json
│   ├── ground_truth_multimodal.jsonl
│   ├── requirements.txt
│   ├── results/
│   │   ├── baseline/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt35turbo-ada002/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt4omini-ada002/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt4omini-emb3l/
│   │   │   ├── README.md
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt4omini-emb3l-2/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5-emb3l/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5chat-emb3l/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5mini-emb3l/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5mini-emb3l-2/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   └── o3mini-ada002/
│   │       ├── config.json
│   │       ├── eval_results.jsonl
│   │       ├── evaluate_parameters.json
│   │       └── summary.json
│   ├── results_multimodal/
│   │   ├── baseline/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── no-image-embeddings/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   └── no-image-sources/
│   │       ├── config.json
│   │       ├── eval_results.jsonl
│   │       ├── evaluate_parameters.json
│   │       └── summary.json
│   ├── safety_evaluation.py
│   └── safety_results.json
├── infra/
│   ├── abbreviations.json
│   ├── app/
│   │   ├── functions-app.bicep
│   │   ├── functions-rbac.bicep
│   │   ├── functions.bicep
│   │   └── storage-containers.bicep
│   ├── backend-dashboard.bicep
│   ├── bicepconfig.json
│   ├── core/
│   │   ├── ai/
│   │   │   ├── ai-environment.bicep
│   │   │   ├── hub.bicep
│   │   │   └── project.bicep
│   │   ├── auth/
│   │   │   └── appregistration.bicep
│   │   ├── host/
│   │   │   ├── appservice-appsettings.bicep
│   │   │   ├── appservice.bicep
│   │   │   ├── appserviceplan.bicep
│   │   │   ├── container-app-upsert.bicep
│   │   │   ├── container-app.bicep
│   │   │   ├── container-apps-auth.bicep
│   │   │   ├── container-apps-environment.bicep
│   │   │   ├── container-apps.bicep
│   │   │   └── container-registry.bicep
│   │   ├── monitor/
│   │   │   └── monitoring.bicep
│   │   ├── networking/
│   │   │   ├── private-dns-zones.bicep
│   │   │   ├── private-endpoint.bicep
│   │   │   └── vnet.bicep
│   │   ├── search/
│   │   │   ├── search-diagnostics.bicep
│   │   │   └── search-services.bicep
│   │   ├── security/
│   │   │   ├── aca-identity.bicep
│   │   │   ├── documentdb-sql-role.bicep
│   │   │   ├── registry-access.bicep
│   │   │   ├── role.bicep
│   │   │   └── storage-role.bicep
│   │   └── storage/
│   │       └── storage-account.bicep
│   ├── main.bicep
│   ├── main.parameters.json
│   ├── main.test.bicep
│   ├── network-isolation.bicep
│   └── private-endpoints.bicep
├── locustfile.py
├── ps-rule.yaml
├── pyproject.toml
├── requirements-dev.txt
├── scripts/
│   ├── adlsgen2setup.py
│   ├── auth_common.py
│   ├── auth_init.ps1
│   ├── auth_init.py
│   ├── auth_init.sh
│   ├── auth_update.ps1
│   ├── auth_update.py
│   ├── auth_update.sh
│   ├── copy_prepdocslib.py
│   ├── cosmosdb_migration.py
│   ├── load-balance-aca-setup.sh
│   ├── load_azd_env.py
│   ├── load_python_env.ps1
│   ├── load_python_env.sh
│   ├── manageacl.py
│   ├── prepdocs.ps1
│   ├── prepdocs.sh
│   ├── roles.ps1
│   ├── roles.sh
│   ├── sampleacls.json
│   ├── setup_cloud_ingestion.ps1
│   ├── setup_cloud_ingestion.sh
│   └── verify_search_index_acls.py
└── tests/
    ├── __init__.py
    ├── conftest.py
    ├── e2e.py
    ├── mocks.py
    ├── snapshots/
    │   ├── test_app/
    │   │   ├── test_chat_followup/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_handle_exception/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_handle_exception_contentsafety/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_handle_exception_contentsafety_streaming/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_handle_exception_streaming/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_hybrid/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_hybrid_semantic_captions/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_hybrid_semantic_ranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_prompt_template/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_prompt_template_concat/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_seed/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_session_state_persists/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_stream_followup/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_handle_exception/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_stream_session_state_persists/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_text/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_text_filter/
    │   │   │   └── auth_client0/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_text_reasoning/
    │   │   │   ├── reasoning_client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── reasoning_client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_vision/
    │   │   │   └── client0/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_text/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_agent/
    │   │   │   ├── knowledgebase_client0/
    │   │   │   │   └── result.json
    │   │   │   ├── knowledgebase_client1_web/
    │   │   │   │   └── result.json
    │   │   │   └── knowledgebase_client2_sharepoint/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_filter/
    │   │   │   └── auth_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_filter_agent/
    │   │   │   └── knowledgebase_auth_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_filter_public_documents/
    │   │   │   └── auth_public_documents_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_reasoning/
    │   │   │   ├── reasoning_client0/
    │   │   │   │   └── result.json
    │   │   │   └── reasoning_client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_semantic_ranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_semanticcaptions/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_semanticranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_vector/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_vector_semantic_ranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_vision/
    │   │   │   ├── client0/
    │   │   │   │   ├── result.json
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_vision_user/
    │   │   │   └── auth_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_vision_vectors/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   └── test_chat_with_history/
    │   │       ├── client0/
    │   │       │   └── result.json
    │   │       └── client1/
    │   │           └── result.json
    │   ├── test_authenticationhelper/
    │   │   ├── test_auth_setup/
    │   │   │   └── result.json
    │   │   ├── test_auth_setup_required_access_control/
    │   │   │   └── result.json
    │   │   └── test_auth_setup_required_access_control_and_unauthenticated_access/
    │   │       └── result.json
    │   ├── test_cosmosdb/
    │   │   ├── test_chathistory_getitem/
    │   │   │   └── auth_public_documents_client0/
    │   │   │       └── result.json
    │   │   ├── test_chathistory_query/
    │   │   │   └── auth_public_documents_client0/
    │   │   │       └── result.json
    │   │   └── test_chathistory_query_continuation/
    │   │       └── auth_public_documents_client0/
    │   │           └── result.json
    │   └── test_prepdocslib_textsplitter/
    │       ├── test_pages_with_figures/
    │       │   ├── pages_with_figures.json/
    │       │   │   └── split_pages_with_figures.json
    │       │   └── pages_with_just_text.json/
    │       │       └── split_pages_with_figures.json
    │       └── test_sentencetextsplitter_list_parse_and_split/
    │           └── text_splitter_sections.txt
    ├── test-data/
    │   ├── Simple Figure_content.txt
    │   ├── Simple Table_content.txt
    │   ├── pages_with_figures.json
    │   └── pages_with_just_text.json
    ├── test_adlsgen2setup.py
    ├── test_agentic_retrieval.py
    ├── test_app.py
    ├── test_app_config.py
    ├── test_auth_init.py
    ├── test_authenticationhelper.py
    ├── test_blob_manager.py
    ├── test_chatapproach.py
    ├── test_content_file.py
    ├── test_cosmosdb.py
    ├── test_cosmosdb_migration.py
    ├── test_csvparser.py
    ├── test_function_apps.py
    ├── test_htmlparser.py
    ├── test_jsonparser.py
    ├── test_listfilestrategy.py
    ├── test_manageacl.py
    ├── test_mediadescriber.py
    ├── test_pdfparser.py
    ├── test_prepdocs.py
    ├── test_prepdocslib_filestrategy.py
    ├── test_prepdocslib_textsplitter.py
    ├── test_searchmanager.py
    ├── test_sentencetextsplitter.py
    ├── test_servicesetup.py
    ├── test_textparser.py
    ├── test_textprocessor.py
    └── test_upload.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .azdo/pipelines/azure-dev.yml
================================================
# Run when commits are pushed to mainline branch (main or master)
# Set this to the mainline branch you are using
trigger:
  - main
  - master

# Azure Pipelines workflow to deploy to Azure using azd
# To configure required secrets and service connection for connecting to Azure, simply run `azd pipeline config --provider azdo`
# Task "Install azd" needs to install setup-azd extension for azdo - https://marketplace.visualstudio.com/items?itemName=ms-azuretools.azd
# See below for alternative task to install azd if you can't install above task in your organization

pool:
  vmImage: ubuntu-latest

steps:
  - task: setup-azd@0
    displayName: Install azd

  # If you can't install above task in your organization, you can comment it and uncomment below task to install azd
  # - task: Bash@3
  #   displayName: Install azd
  #   inputs:
  #     targetType: 'inline'
  #     script: |
  #       curl -fsSL https://aka.ms/install-azd.sh | bash

  # azd delegate auth to az to use service connection with AzureCLI@2
  - pwsh: |
      azd config set auth.useAzCliAuth "true"
    displayName: Configure AZD to Use AZ CLI Authentication.

  - task: AzureCLI@2
    displayName: Provision Infrastructure
    inputs:
      # azconnection is the service connection created by azd. You can change it to any service connection you have in your organization.
      azureSubscription: azconnection
      scriptType: bash
      scriptLocation: inlineScript
      inlineScript: |
        azd provision --no-prompt
    env:
      AZURE_SUBSCRIPTION_ID: $(AZURE_SUBSCRIPTION_ID)
      AZURE_ENV_NAME: $(AZURE_ENV_NAME)
      AZURE_LOCATION: $(AZURE_LOCATION)
      AZD_INITIAL_ENVIRONMENT_CONFIG: $(AZD_INITIAL_ENVIRONMENT_CONFIG)
      AZURE_OPENAI_SERVICE: $(AZURE_OPENAI_SERVICE)
      AZURE_OPENAI_LOCATION: $(AZURE_OPENAI_LOCATION)
      AZURE_OPENAI_RESOURCE_GROUP: $(AZURE_OPENAI_RESOURCE_GROUP)
      AZURE_DOCUMENTINTELLIGENCE_SERVICE: $(AZURE_DOCUMENTINTELLIGENCE_SERVICE)
      AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: $(AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP)
      AZURE_DOCUMENTINTELLIGENCE_SKU: $(AZURE_DOCUMENTINTELLIGENCE_SKU)
      AZURE_DOCUMENTINTELLIGENCE_LOCATION: $(AZURE_DOCUMENTINTELLIGENCE_LOCATION)
      AZURE_SEARCH_INDEX: $(AZURE_SEARCH_INDEX)
      AZURE_SEARCH_SERVICE: $(AZURE_SEARCH_SERVICE)
      AZURE_SEARCH_SERVICE_RESOURCE_GROUP: $(AZURE_SEARCH_SERVICE_RESOURCE_GROUP)
      AZURE_SEARCH_SERVICE_LOCATION: $(AZURE_SEARCH_SERVICE_LOCATION)
      AZURE_SEARCH_SERVICE_SKU: $(AZURE_SEARCH_SERVICE_SKU)
      AZURE_SEARCH_QUERY_LANGUAGE: $(AZURE_SEARCH_QUERY_LANGUAGE)
      AZURE_SEARCH_QUERY_SPELLER: $(AZURE_SEARCH_QUERY_SPELLER)
      AZURE_SEARCH_SEMANTIC_RANKER: $(AZURE_SEARCH_SEMANTIC_RANKER)
      AZURE_SEARCH_QUERY_REWRITING: $(AZURE_SEARCH_QUERY_REWRITING)
      AZURE_SEARCH_FIELD_NAME_EMBEDDING: $(AZURE_SEARCH_FIELD_NAME_EMBEDDING)
      AZURE_STORAGE_ACCOUNT: $(AZURE_STORAGE_ACCOUNT)
      AZURE_STORAGE_RESOURCE_GROUP: $(AZURE_STORAGE_RESOURCE_GROUP)
      AZURE_STORAGE_SKU: $(AZURE_STORAGE_SKU)
      AZURE_APP_SERVICE_SKU: $(AZURE_APP_SERVICE_SKU)
      AZURE_OPENAI_CHATGPT_MODEL: $(AZURE_OPENAI_CHATGPT_MODEL)
      AZURE_OPENAI_CHATGPT_DEPLOYMENT: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT)
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY)
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION)
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU)
      AZURE_OPENAI_REASONING_EFFORT: $(AZURE_OPENAI_REASONING_EFFORT)
      AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT: $(AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT)
      AZURE_OPENAI_EMB_MODEL_NAME: $(AZURE_OPENAI_EMB_MODEL_NAME)
      AZURE_OPENAI_EMB_DEPLOYMENT: $(AZURE_OPENAI_EMB_DEPLOYMENT)
      AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY)
      AZURE_OPENAI_EMB_DEPLOYMENT_VERSION: $(AZURE_OPENAI_EMB_DEPLOYMENT_VERSION)
      AZURE_OPENAI_EMB_DEPLOYMENT_SKU: $(AZURE_OPENAI_EMB_DEPLOYMENT_SKU)
      AZURE_OPENAI_EMB_DIMENSIONS: $(AZURE_OPENAI_EMB_DIMENSIONS)
      AZURE_OPENAI_DISABLE_KEYS: $(AZURE_OPENAI_DISABLE_KEYS)
      OPENAI_HOST: $(OPENAI_HOST)
      OPENAI_API_KEY: $(OPENAI_API_KEY)
      OPENAI_ORGANIZATION: $(OPENAI_ORGANIZATION)
      AZURE_USE_APPLICATION_INSIGHTS: $(AZURE_USE_APPLICATION_INSIGHTS)
      AZURE_APPLICATION_INSIGHTS: $(AZURE_APPLICATION_INSIGHTS)
      AZURE_APPLICATION_INSIGHTS_DASHBOARD: $(AZURE_APPLICATION_INSIGHTS_DASHBOARD)
      AZURE_LOG_ANALYTICS: $(AZURE_LOG_ANALYTICS)
      USE_VECTORS: $(USE_VECTORS)
      USE_MULTIMODAL: $(USE_MULTIMODAL)
      USE_CLOUD_INGESTION: $(USE_CLOUD_INGESTION)
      USE_CLOUD_INGESTION_ACLS: $(USE_CLOUD_INGESTION_ACLS)
      USE_EXISTING_ADLS_STORAGE: $(USE_EXISTING_ADLS_STORAGE)
      AZURE_ADLS_GEN2_STORAGE_ACCOUNT: $(AZURE_ADLS_GEN2_STORAGE_ACCOUNT)
      AZURE_ADLS_GEN2_STORAGE_RESOURCE_GROUP: $(AZURE_ADLS_GEN2_STORAGE_RESOURCE_GROUP)
      AZURE_VISION_ENDPOINT: $(AZURE_VISION_ENDPOINT)
      VISION_SECRET_NAME: $(VISION_SECRET_NAME)
      AZURE_VISION_SERVICE: $(AZURE_VISION_SERVICE)
      AZURE_VISION_RESOURCE_GROUP: $(AZURE_VISION_RESOURCE_GROUP)
      AZURE_VISION_LOCATION: $(AZURE_VISION_LOCATION)
      AZURE_VISION_SKU: $(AZURE_VISION_SKU)
      ENABLE_LANGUAGE_PICKER: $(ENABLE_LANGUAGE_PICKER)
      USE_SPEECH_INPUT_BROWSER: $(USE_SPEECH_INPUT_BROWSER)
      USE_SPEECH_OUTPUT_BROWSER: $(USE_SPEECH_OUTPUT_BROWSER)
      USE_SPEECH_OUTPUT_AZURE: $(USE_SPEECH_OUTPUT_AZURE)
      AZURE_SPEECH_SERVICE: $(AZURE_SPEECH_SERVICE)
      AZURE_SPEECH_SERVICE_RESOURCE_GROUP: $(AZURE_SPEECH_SERVICE_RESOURCE_GROUP)
      AZURE_SPEECH_SERVICE_LOCATION: $(AZURE_SPEECH_SERVICE_LOCATION)
      AZURE_SPEECH_SERVICE_SKU: $(AZURE_SPEECH_SERVICE_SKU)
      AZURE_SPEECH_SERVICE_VOICE: $(AZURE_SPEECH_SERVICE_VOICE)
      AZURE_KEY_VAULT_NAME: $(AZURE_KEY_VAULT_NAME)
      AZURE_USE_AUTHENTICATION: $(AZURE_USE_AUTHENTICATION)
      AZURE_ENFORCE_ACCESS_CONTROL: $(AZURE_ENFORCE_ACCESS_CONTROL)
      AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: $(AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS)
      AZURE_ENABLE_UNAUTHENTICATED_ACCESS: $(AZURE_ENABLE_UNAUTHENTICATED_ACCESS)
      AZURE_TENANT_ID: $(AZURE_TENANT_ID)
      AZURE_AUTH_TENANT_ID: $(AZURE_AUTH_TENANT_ID)
      AZURE_SERVER_APP_ID: $(AZURE_SERVER_APP_ID)
      AZURE_CLIENT_APP_ID: $(AZURE_CLIENT_APP_ID)
      ALLOWED_ORIGIN: $(ALLOWED_ORIGIN)
      AZURE_SERVER_APP_SECRET: $(AZURE_SERVER_APP_SECRET)
      AZURE_CLIENT_APP_SECRET: $(AZURE_CLIENT_APP_SECRET)
      AZURE_ADLS_GEN2_FILESYSTEM: $(AZURE_ADLS_GEN2_FILESYSTEM)
      DEPLOYMENT_TARGET: $(DEPLOYMENT_TARGET)
      AZURE_CONTAINER_APPS_WORKLOAD_PROFILE: $(AZURE_CONTAINER_APPS_WORKLOAD_PROFILE)
      USE_CHAT_HISTORY_BROWSER: $(USE_CHAT_HISTORY_BROWSER)
      USE_MEDIA_DESCRIBER_AZURE_CU: $(USE_MEDIA_DESCRIBER_AZURE_CU)
      RAG_SEARCH_TEXT_EMBEDDINGS: $(RAG_SEARCH_TEXT_EMBEDDINGS)
      RAG_SEARCH_IMAGE_EMBEDDINGS: $(RAG_SEARCH_IMAGE_EMBEDDINGS)
      RAG_SEND_TEXT_SOURCES: $(RAG_SEND_TEXT_SOURCES)
      RAG_SEND_IMAGE_SOURCES: $(RAG_SEND_IMAGE_SOURCES)
      USE_AGENTIC_KNOWLEDGEBASE: $(USE_AGENTIC_KNOWLEDGEBASE)
      USE_WEB_SOURCE: $(USE_WEB_SOURCE)
      USE_SHAREPOINT_SOURCE: $(USE_SHAREPOINT_SOURCE)
  - task: AzureCLI@2
    displayName: Deploy Application
    inputs:
      azureSubscription: azconnection
      scriptType: bash
      scriptLocation: inlineScript
      inlineScript: |
        azd deploy --no-prompt


================================================
FILE: .devcontainer/devcontainer.json
================================================
{
    "name": "Azure Search OpenAI Demo",
    "image": "mcr.microsoft.com/devcontainers/python:3.13-bookworm",
    "features": {
        "ghcr.io/devcontainers/features/node:1": {
            // This should match the version of Node.js in Github Actions workflows
            "version": "22",
            "nodeGypDependencies": false
        },
        "ghcr.io/devcontainers/features/azure-cli:1.2.5": {},
        "ghcr.io/devcontainers/features/docker-in-docker:2": {},
        "ghcr.io/azure/azure-dev/azd:latest": {}
    },
    "customizations": {
        "vscode": {
            "extensions": [
                "ms-azuretools.azure-dev",
                "ms-azuretools.vscode-bicep",
                "ms-python.python",
                "astral-sh.ty",
                "esbenp.prettier-vscode",
                "DavidAnson.vscode-markdownlint"
            ]
        }
    },
    "forwardPorts": [
        50505
    ],
    "postCreateCommand": "",
    "remoteUser": "vscode",
    "hostRequirements": {
        "memory": "8gb"
    }
}


================================================
FILE: .gitattributes
================================================
*.sh text eol=lf
*.jsonlines text eol=lf


================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Microsoft Open Source Code of Conduct

This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).

Resources:

- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns


================================================
FILE: .github/ISSUE_TEMPLATE.md
================================================
<!--
IF SUFFICIENT INFORMATION IS NOT PROVIDED VIA THE FOLLOWING TEMPLATE THE ISSUE MIGHT BE CLOSED WITHOUT FURTHER CONSIDERATION OR INVESTIGATION
-->
> Please provide us with the following information:
> ---------------------------------------------------------------

### This issue is for a: (mark with an `x`)
```
- [ ] bug report -> please search issues before submitting
- [ ] feature request
- [ ] documentation issue or request
- [ ] regression (a behavior that used to work and stopped in a new release)
```

### Minimal steps to reproduce
>

### Any log messages given by the failure
>

### Expected/desired behavior
>

### OS and Version?
> Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?)

### azd version?
> run `azd version` and copy paste here.

### Versions
>

### Mention any other details that might be useful

> ---------------------------------------------------------------
> Thanks! We'll be in touch soon.


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## Purpose

<!-- Describe the intention of the changes being proposed. What problem does it solve or functionality does it add? -->


## Does this introduce a breaking change?

When developers merge from main and run the server, azd up, or azd deploy, will this produce an error?
If you're not sure, try it out on an old environment.

```
[ ] Yes
[ ] No
```

## Does this require changes to learn.microsoft.com docs?

This repository is referenced by [this tutorial](https://learn.microsoft.com/azure/developer/python/get-started-app-chat-template)
which includes deployment, settings and usage instructions. If text or screenshot need to change in the tutorial,
check the box below and notify the tutorial author. A Microsoft employee can do this for you if you're an external contributor.

```
[ ] Yes
[ ] No
```

## Type of change

```
[ ] Bugfix
[ ] Feature
[ ] Code style update (formatting, local variables)
[ ] Refactoring (no functional changes, no api changes)
[ ] Documentation content changes
[ ] Other... Please describe:
```

## Code quality checklist

See [CONTRIBUTING.md](https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/CONTRIBUTING.md#submit-pr) for more details.

- [ ] The current tests all pass (`python -m pytest`).
- [ ] I added tests that prove my fix is effective or that my feature works
- [ ] I ran `python -m pytest --cov` to verify 100% coverage of added lines
- [ ] I ran `ty check` to check for type errors
- [ ] I either used the pre-commit hooks or ran `ruff` and `black` manually on my code.


================================================
FILE: .github/agents/fixer.agent.md
================================================
---
description: 'Fix and verify issues in app'
tools: ['vscode', 'execute', 'read', 'edit', 'search', 'web', 'agent', 'azure-mcp/search', 'github/create_pull_request', 'github/issue_read', 'github/list_issues', 'github/search_issues', 'playwright/*', 'pylance-mcp-server/*', 'microsoftdocs/mcp/*']
---

# Fixer Mode Instructions

You are in fixer mode. When given an issue to fix, follow these steps:

1. **Gather context**: Read error messages/stack traces/related code. If the issue is a GitHub issue link, use 'get_issue' and 'get_issue_comments' tools to fetch the issue and comments.
2. **Make targeted fix**: Make minimal changes to fix the issue. Do not fix any issues that weren't identified. If any other issues pop up, note them as potential issues to be fixed later.
3. **Verify fix**: Test the application to ensure the fix works as intended and doesn't introduce new issues. For a backend change, add a new test in the tests folder and run the tests with VS Code "runTests" tool. RUN all the tests using that tool, not just the tests you added. Try to add tests to existing test files when possible, like test_app.py. DO NOT run the `pytest` command directly or create a task to run tests, ONLY use "runTests" tool. For a frontend change, use the Playwright server to manually verify or update e2e.py tests.

## Local server setup

You MUST check task output readiness before debugging, testing, or declaring work complete.

- Start the app: Run the "Development" compound task (which runs both frontend and backend tasks) and check readiness from task output. Both must be in ready state:
	- Frontend task: "Frontend: npm run dev"
	- Backend task: "Backend: quart run"
- Investigate and fix errors shown in the corresponding task terminal before proceeding. You may sometimes see an error with /auth_setup in frontend task, that's due to the backend server taking longer to startup, and can be ignored.
- Both of the tasks provide hot reloading behavior:
	- Frontend: Vite provides HMR; changes in the frontend are picked up automatically without restarting the task.
	- Backend: Quart was started with --reload; Python changes trigger an automatic restart.
	- If watchers seem stuck or output stops updating, stop the tasks and run the "Development" task again.
- To interact with a running application, use the Playwright MCP server. If testing login, you will need to navigate to 'localhost' instead of '127.0.0.1' since that's the URL allowed by the Entra application.

## Running Python scripts

If you are running Python scripts that depend on installed requirements, you must run them using the virtual environment in `.venv`.

## Committing the change

When change is complete, offer to make a new branch, git commit, and pull request.
DO NOT check out a new branch unless explicitly confirmed - sometimes user is already in a branch

## Making the PR

* Use the `github/create_pull_request` tool to create the PR.
* Follow the `.github/PULL_REQUEST_TEMPLATE.md` format, with all sections filled out and appropriate checkboxes checked. If any section does not apply, write "N/A" in that section.
* Includes "Fixes #<issue number>" sentence in the PR description to auto-close the issue when the PR is merged.


================================================
FILE: .github/agents/triager.agent.md
================================================
---
description: 'Triage old stale issues for obsolescence and recommend closures'
tools: ['edit', 'search/usages', 'web', 'azure-mcp/search', 'github/add_issue_comment', 'github/get_commit', 'github/get_file_contents', 'github/get_latest_release', 'github/get_me', 'github/get_release_by_tag', 'github/get_tag', 'github/issue_read', 'github/issue_write', 'github/list_branches', 'github/list_commits', 'github/list_issue_types', 'github/list_issues', 'github/list_pull_requests', 'github/list_releases', 'github/list_tags', 'github/pull_request_read', 'github/search_code', 'github/search_issues', 'github/search_pull_requests', 'github/search_repositories', 'github/search_users', 'github/assign_copilot_to_issue', 'todo']
---

# Issue Triager

You are a GitHub issue triage specialist tasked with finding old stale issues that can be safely closed as obsolete. DO NOT actually close them yourself unless specifically told to do so. Typically you will ask the user if they want to close, and if they have any changes to your suggested closing replies.

## Task Requirements

### Primary Objective
Find the specified number of stale issues in the Azure-Samples/azure-search-openai-demo repository that can be closed due to being obsolete or resolved by subsequent improvements.

### Analysis Process
1. **Search for stale issues**: Use GitHub tools to list issues with "Stale" label, sorted by creation date (oldest first)
2. **Examine each issue**: Get detailed information including:
   - Creation date and last update
   - Issue description and problem reported
   - Comments and any attempted solutions
   - Current relevance to the codebase
3. **Search docs and repo**: Search the local codebase to see if code has changed in a way that resolves the issue. Also look at README.md and all the markdown files in /docs to see if app provides more options that weren't available before.
4. **Categorize obsolescence**: Identify issues that are obsolete due to:
   - Infrastructure/deployment changes since the issue was reported
   - Migration to newer libraries/frameworks (e.g., OpenAI SDK updates)
   - Cross-platform compatibility improvements
   - Configuration system redesigns
   - API changes that resolve the underlying problem

### Output Format
For each recommended issue closure, provide:

1. **Issue Number and Title**
2. **GitHub Link**: Direct URL to the issue
3. **Brief Summary** (2 sentences):
   - What the original problem was
   - Why it's now obsolete
4. **Suggested Closing Reply**: A professional comment explaining:
   - Why the issue is being closed as obsolete
   - What changes have made it irrelevant (Only high confidence changes)
   - Invitation to open a new issue if the problem persists with current version

### Success Criteria
- Issues should be at least 1 year old
- Issues should have "Stale" label
- Must provide clear rationale for why each issue is obsolete
- Closing replies should be professional and helpful
- Focus on issues that won't recur with current codebase

### Constraints
- Do not recommend closing issues that represent ongoing valid feature requests
- Avoid closing issues that highlight fundamental design limitations
- Skip issues that could still affect current users even if less common
- Ensure the obsolescence is due to actual code/infrastructure changes, not just age

### Example Categories to Target
- Deployment failures from early 2023 that were fixed by infrastructure improvements
- Cross-platform compatibility issues resolved by script migrations
- API errors from old library versions that have been updated
- Configuration issues resolved by azd template redesigns
- Authentication/permissions errors fixed by improved role assignment logic


================================================
FILE: .github/dependabot.yaml
================================================
version: 2
updates:

  # Maintain dependencies for GitHub Actions
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
    groups:
      github-actions:
        patterns:
          - "*"

  # Maintain dependencies for npm
  - package-ecosystem: "npm"
    directory: "/app/frontend"
    schedule:
      interval: "weekly"
    # Ignore Vite 7.x for now: Vite >=7 requires Node >=20.19.0 while project engines.node is currently >=20.0.0
    ignore:
      - dependency-name: "vite"
        versions:
          - ">=7.0.0"

  # Maintain dependencies for pip
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "weekly"
    groups:
      telemetry:
        patterns:
          - "opentelemetry-*"
          - "azure-monitor-opentelemetry*"
      pydantic:
        patterns:
          - "pydantic"
          - "pydantic-*"
          - "typing-extensions"
      pallets:
        patterns:
          - "flask"
          - "werkzeug"
          - "blinker"
          - "quart"
          - "jinja2"
          - "click"
          - "itsdangerous"
          - "markupsafe"


================================================
FILE: .github/instructions/bicep.instructions.md
================================================
---
description: 'Infrastructure as Code with Bicep'
applyTo: '**/*.bicep'
---

# Bicep best-practices
This list of best-practices builds on top of information available at https://learn.microsoft.com/azure/azure-resource-manager/bicep. It provides a more opinionated and up-to-date set of rules for generating high-quality Bicep code. You should aim to follow these rules whenever generating or modifying Bicep code.

## Rules
### General
1. Avoid setting the `name` field for `module` statements - it is no longer required.
1. If you need to input or output a set of logically-grouped values, generate a single `param` or `output` statement with a User-defined type instead of emitting a `param` or `output` statement for each value.
1. If generating parameters, default to generating Bicep parameters files (`*.bicepparam`), instead of ARM parameters files (`*.json`).

### Resources
1. Do not add references from child resources to parent resources by using `/` characters in the child resource `name` property. Instead, use the `parent` property with a symbolic reference to the parent resource.
1. If you are generating a child resource type, sometimes this may require you to add an `existing` resource for the parent if the parent is not already present in the file.
1. If you see diagnostic codes `BCP036`, `BCP037` or `BCP081`, this may indicate you have hallucinated resource types or resource type properties. You may need to double-check against available resource type schema to tune your output.
1. Avoid using multiple `resourceId()` functions and `reference()` function where possible. Instead use symbolic names to refer to ids or properties, creating `existing` resources if needed. For example, write `foo.id` or `foo.properties.id`, instead of `resourceId('...')` or `reference('...').id`.

### Types
1. Avoid using open types such as `array` or `object` when referencing types where possible (e.g. in `output` or `param` statements). Instead, use User-defined types to define a more precise type.
1. Use typed variables instead of untyped variables when exporting values with the `@export()` decorator. For example, use `var foo string = 'blah'` instead of `var foo = bar`.
1. When using User-defined types, aim to avoid repetition, and comment properties with `@description()` where the context is unclear.
1. If you are passing data directly to or from a resource body via a `param` or `output` statement, try to use existing Resource-derived types (`resourceInput<'type@version'>` and `resourceOutput<'type@version'>`) instead of writing User-defined types.

### Security
1. When generating `param` or `output` statements, ALWAYS use the `@secure()` decorator if sensitive data is present.

### Syntax
1. If you hit warnings or errors with null properties, prefer solving them with the safe-dereference (`.?`) operator, in conjunction with the coalesce (`??`) operator. For example, `a.?b ?? c` is better than `a!.b` which may cause runtime errors, or `a != null ? a.b : c` which is unnecessarily verbose.

## Glossary
* Child resource: an Azure resource type with type name consisting of more than 1 `/` characters. For example, `Microsoft.Network/virtualNetworks/subnets` is a child resource. `Microsoft.Network/virtualNetworks` is not.


================================================
FILE: .github/prompts/review_pr_comments.prompt.md
================================================
---
agent: agent
---
We have received comments on the current active pull request. Together, we will go through each comment one by one and discuss whether to accept the change, iterate on it, or reject the change.

## Steps to follow:

1. Fetch the active pull request: If available, use the `activePullRequest` tool from the `GitHub Pull Requests` toolset to get the details of the active pull request including the comments. If not, use the GitHub MCP server or GitHub CLI to get the details of the active pull request. Fetch both top level comments and inline comments.
2. Present a list of the comments with a one-sentence summary of each.
3. One at a time, present each comment in full detail and ask me whether to accept, iterate, or reject the change. Provide your recommendation for each comment based on best practices, code quality, and project guidelines. Await user's decision before proceeding to the next comment. DO NOT make any changes to the code or files until I have responded with my decision for each comment.
4. If the decision is to accept or iterate, make the necessary code changes to address the comment. If the decision is to reject, provide a brief explanation of why the change was not made.
5. Wait for user to affirm completion of any code changes made before moving to the next comment.
6. Reply to each comment on the pull request with the outcome of our discussion (accepted, iterated, or rejected) along with any relevant explanations.


================================================
FILE: .github/skills/github-pr-inline-reply/SKILL.md
================================================
---
name: github-pr-inline-reply
description: Reply to inline PR review comments on GitHub pull requests using the GitHub API. Use this skill when you need to respond to individual review comments on a PR, acknowledge feedback, or mark comments as resolved by posting direct replies to comment threads.
---

# GitHub PR Inline Reply Skill

This skill enables replying directly to inline review comments on GitHub pull requests.

## When to use

- Replying to individual PR review comments
- Acknowledging reviewer feedback on specific lines of code
- Marking review comments as addressed with a reply

## API Endpoint

To reply to an inline PR comment, use:

```http
POST /repos/{owner}/{repo}/pulls/{pull_number}/comments/{comment_id}/replies
```

With body:

```json
{
  "body": "Your reply message"
}
```

## Using gh CLI

```bash
gh api repos/{owner}/{repo}/pulls/{pull_number}/comments/{comment_id}/replies \
  -X POST \
  -f body="Your reply message"
```

## Workflow

1. **Get PR comments**: First fetch the PR review comments to get their IDs:

   ```bash
   gh api repos/{owner}/{repo}/pulls/{pull_number}/comments
   ```

2. **Identify comment IDs**: Each comment has an `id` field. For threaded comments, use the root comment's `id`.

3. **Post replies**: For each comment you want to reply to:

   ```bash
   gh api repos/{owner}/{repo}/pulls/{pull_number}/comments/{comment_id}/replies \
     -X POST \
     -f body="Fixed in commit abc123"
   ```

## Example Replies

For accepted changes:

- "Fixed in {commit_sha}"
- "Accepted - fixed in {commit_sha}"

For rejected changes:

- "Rejected - {reason}"
- "Won't fix - {explanation}"

For questions:

- "Good catch, addressed in {commit_sha}"

## Notes

- The `comment_id` is the numeric ID from the comment object, NOT the `node_id`
- Replies appear as threaded responses under the original comment
- You can reply to any comment, including bot comments (like Copilot reviews)

## Resolving Conversations

To resolve (mark as resolved) PR review threads, use the GraphQL API:

1. **Get thread IDs**: Query for unresolved threads:

   ```bash
   gh api graphql -f query='
   query {
     repository(owner: "{owner}", name: "{repo}") {
       pullRequest(number: {pull_number}) {
         reviewThreads(first: 50) {
           nodes {
             id
             isResolved
             comments(first: 1) {
               nodes { body path }
             }
           }
         }
       }
     }
   }'
   ```

2. **Resolve threads**: Use the `resolveReviewThread` mutation:

   ```bash
   gh api graphql -f query='
   mutation {
     resolveReviewThread(input: {threadId: "PRRT_xxx"}) {
       thread { isResolved }
     }
   }'
   ```

3. **Resolve multiple threads at once**:

   ```bash
   gh api graphql -f query='
   mutation {
     t1: resolveReviewThread(input: {threadId: "PRRT_xxx"}) { thread { isResolved } }
     t2: resolveReviewThread(input: {threadId: "PRRT_yyy"}) { thread { isResolved } }
   }'
   ```

The thread ID starts with `PRRT_` and can be found in the GraphQL query response.

Note: This skill can be removed once the GitHub MCP server has added built-in support for replying to PR review comments and resolving threads.
See:
https://github.com/github/github-mcp-server/issues/1323
https://github.com/github/github-mcp-server/issues/1768


================================================
FILE: .github/workflows/azure-dev-validation.yaml
================================================
name: Validate AZD template
on:
  push:
    branches: [ main ]
    paths:
      - "infra/**"
  pull_request:
    branches: [ main ]
    paths:
      - "infra/**"
  workflow_dispatch:

jobs:
  bicep:
    runs-on: ubuntu-latest
    permissions:
      security-events: write
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Build Bicep for linting
        uses: azure/CLI@v2
        with:
          inlineScript: |
            export DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
            az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout

  psrule:
    runs-on: ubuntu-latest
    permissions:
      security-events: write
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Run PSRule analysis
        uses: microsoft/ps-rule@v2.9.0
        with:
          modules: PSRule.Rules.Azure
          baseline: Azure.Pillar.Security
          inputPath: infra/*.test.bicep
          outputFormat: Sarif
          outputPath: reports/ps-rule-results.sarif
          summary: true
        continue-on-error: true

        env:
          PSRULE_CONFIGURATION_AZURE_BICEP_FILE_EXPANSION: 'true'
          PSRULE_CONFIGURATION_AZURE_BICEP_FILE_EXPANSION_TIMEOUT: '30'

      - name: Upload results to security tab
        uses: github/codeql-action/upload-sarif@v4
        if: github.repository == 'Azure-Samples/azure-search-openai-demo'
        with:
          sarif_file: reports/ps-rule-results.sarif


================================================
FILE: .github/workflows/azure-dev.yml
================================================
name: Deploy

on:
  workflow_dispatch:
  push:
    # Run when commits are pushed to mainline branch (main or master)
    # Set this to the mainline branch you are using
    branches:
      - main
      - master

# GitHub Actions workflow to deploy to Azure using azd
# To configure required secrets for connecting to Azure, simply run `azd pipeline config`

# Set up permissions for deploying with secretless Azure federated credentials
# https://learn.microsoft.com/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication
permissions:
  id-token: write
  contents: read

jobs:
  build:
    runs-on: ubuntu-latest
    env:
      # azd required
      AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
      AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
      AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
      AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }}
      AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
      # project specific
      AZURE_OPENAI_SERVICE: ${{ vars.AZURE_OPENAI_SERVICE }}
      AZURE_OPENAI_LOCATION: ${{ vars.AZURE_OPENAI_LOCATION }}
      AZURE_OPENAI_RESOURCE_GROUP: ${{ vars.AZURE_OPENAI_RESOURCE_GROUP }}
      AZURE_DOCUMENTINTELLIGENCE_SERVICE: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SERVICE }}
      AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP }}
      AZURE_DOCUMENTINTELLIGENCE_SKU: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SKU }}
      AZURE_DOCUMENTINTELLIGENCE_LOCATION: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_LOCATION }}
      AZURE_VISION_SERVICE: ${{ vars.AZURE_VISION_SERVICE }}
      AZURE_VISION_RESOURCE_GROUP: ${{ vars.AZURE_VISION_RESOURCE_GROUP }}
      AZURE_VISION_LOCATION: ${{ vars.AZURE_VISION_LOCATION }}
      AZURE_VISION_SKU: ${{ vars.AZURE_VISION_SKU }}
      AZURE_SEARCH_INDEX: ${{ vars.AZURE_SEARCH_INDEX }}
      AZURE_SEARCH_SERVICE: ${{ vars.AZURE_SEARCH_SERVICE }}
      AZURE_SEARCH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SEARCH_SERVICE_RESOURCE_GROUP }}
      AZURE_SEARCH_SERVICE_LOCATION: ${{ vars.AZURE_SEARCH_SERVICE_LOCATION }}
      AZURE_SEARCH_SERVICE_SKU: ${{ vars.AZURE_SEARCH_SERVICE_SKU }}
      AZURE_SEARCH_QUERY_LANGUAGE: ${{ vars.AZURE_SEARCH_QUERY_LANGUAGE }}
      AZURE_SEARCH_QUERY_SPELLER: ${{ vars.AZURE_SEARCH_QUERY_SPELLER }}
      AZURE_SEARCH_SEMANTIC_RANKER: ${{ vars.AZURE_SEARCH_SEMANTIC_RANKER }}
      AZURE_SEARCH_QUERY_REWRITING: ${{ vars.AZURE_SEARCH_QUERY_REWRITING }}
      AZURE_SEARCH_FIELD_NAME_EMBEDDING: ${{ vars.AZURE_SEARCH_FIELD_NAME_EMBEDDING }}
      AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }}
      AZURE_STORAGE_RESOURCE_GROUP: ${{ vars.AZURE_STORAGE_RESOURCE_GROUP }}
      AZURE_STORAGE_SKU: ${{ vars.AZURE_STORAGE_SKU }}
      AZURE_APP_SERVICE_PLAN: ${{ vars.AZURE_APP_SERVICE_PLAN }}
      AZURE_APP_SERVICE_SKU: ${{ vars.AZURE_APP_SERVICE_SKU }}
      AZURE_APP_SERVICE: ${{ vars.AZURE_APP_SERVICE }}
      AZURE_OPENAI_CHATGPT_MODEL: ${{ vars.AZURE_OPENAI_CHATGPT_MODEL }}
      AZURE_OPENAI_CHATGPT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT }}
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY }}
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION }}
      AZURE_OPENAI_REASONING_EFFORT: ${{ vars.AZURE_OPENAI_REASONING_EFFORT }}
      AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT: ${{ vars.AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT }}
      AZURE_OPENAI_EMB_MODEL_NAME: ${{ vars.AZURE_OPENAI_EMB_MODEL_NAME }}
      AZURE_OPENAI_EMB_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT }}
      AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY }}
      AZURE_OPENAI_EMB_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_VERSION }}
      AZURE_OPENAI_EMB_DIMENSIONS: ${{ vars.AZURE_OPENAI_EMB_DIMENSIONS }}
      USE_EVAL: ${{ vars.USE_EVAL }}
      AZURE_OPENAI_EVAL_MODEL: ${{ vars.AZURE_OPENAI_EVAL_MODEL }}
      AZURE_OPENAI_EVAL_MODEL_VERSION: ${{ vars.AZURE_OPENAI_EVAL_MODEL_VERSION }}
      AZURE_OPENAI_EVAL_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT }}
      AZURE_OPENAI_EVAL_DEPLOYMENT_SKU: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_SKU }}
      AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY }}
      AZURE_OPENAI_DISABLE_KEYS: ${{ vars.AZURE_OPENAI_DISABLE_KEYS }}
      OPENAI_HOST: ${{ vars.OPENAI_HOST }}
      OPENAI_API_KEY: ${{ vars.OPENAI_API_KEY }}
      OPENAI_ORGANIZATION: ${{ vars.OPENAI_ORGANIZATION }}
      AZURE_USE_APPLICATION_INSIGHTS: ${{ vars.AZURE_USE_APPLICATION_INSIGHTS }}
      AZURE_APPLICATION_INSIGHTS: ${{ vars.AZURE_APPLICATION_INSIGHTS }}
      AZURE_APPLICATION_INSIGHTS_DASHBOARD: ${{ vars.AZURE_APPLICATION_INSIGHTS_DASHBOARD }}
      AZURE_LOG_ANALYTICS: ${{ vars.AZURE_LOG_ANALYTICS }}
      USE_VECTORS: ${{ vars.USE_VECTORS }}
      USE_MULTIMODAL: ${{ vars.USE_MULTIMODAL }}
      USE_CLOUD_INGESTION: ${{ vars.USE_CLOUD_INGESTION }}
      USE_CLOUD_INGESTION_ACLS: ${{ vars.USE_CLOUD_INGESTION_ACLS }}
      USE_EXISTING_ADLS_STORAGE: ${{ vars.USE_EXISTING_ADLS_STORAGE }}
      AZURE_ADLS_GEN2_STORAGE_ACCOUNT: ${{ vars.AZURE_ADLS_GEN2_STORAGE_ACCOUNT }}
      AZURE_ADLS_GEN2_STORAGE_RESOURCE_GROUP: ${{ vars.AZURE_ADLS_GEN2_STORAGE_RESOURCE_GROUP }}
      AZURE_VISION_ENDPOINT: ${{ vars.AZURE_VISION_ENDPOINT }}
      VISION_SECRET_NAME: ${{ vars.VISION_SECRET_NAME }}
      ENABLE_LANGUAGE_PICKER: ${{ vars.ENABLE_LANGUAGE_PICKER }}
      USE_SPEECH_INPUT_BROWSER: ${{ vars.USE_SPEECH_INPUT_BROWSER }}
      USE_SPEECH_OUTPUT_BROWSER: ${{ vars.USE_SPEECH_OUTPUT_BROWSER }}
      USE_SPEECH_OUTPUT_AZURE: ${{ vars.USE_SPEECH_OUTPUT_AZURE }}
      AZURE_SPEECH_SERVICE: ${{ vars.AZURE_SPEECH_SERVICE }}
      AZURE_SPEECH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SPEECH_RESOURCE_GROUP }}
      AZURE_SPEECH_SERVICE_LOCATION: ${{ vars.AZURE_SPEECH_SERVICE_LOCATION }}
      AZURE_SPEECH_SERVICE_SKU: ${{ vars.AZURE_SPEECH_SERVICE_SKU }}
      AZURE_SPEECH_SERVICE_VOICE: ${{ vars.AZURE_SPEECH_SERVICE_VOICE }}
      AZURE_KEY_VAULT_NAME: ${{ vars.AZURE_KEY_VAULT_NAME }}
      AZURE_USE_AUTHENTICATION: ${{ vars.AZURE_USE_AUTHENTICATION }}
      AZURE_ENFORCE_ACCESS_CONTROL: ${{ vars.AZURE_ENFORCE_ACCESS_CONTROL }}
      AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: ${{ vars.AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS }}
      AZURE_ENABLE_UNAUTHENTICATED_ACCESS: ${{ vars.AZURE_ENABLE_UNAUTHENTICATED_ACCESS }}
      AZURE_AUTH_TENANT_ID: ${{ vars.AZURE_AUTH_TENANT_ID }}
      AZURE_SERVER_APP_ID: ${{ vars.AZURE_SERVER_APP_ID }}
      AZURE_CLIENT_APP_ID: ${{ vars.AZURE_CLIENT_APP_ID }}
      ALLOWED_ORIGIN: ${{ vars.ALLOWED_ORIGIN }}
      AZURE_ADLS_GEN2_FILESYSTEM: ${{ vars.AZURE_ADLS_GEN2_FILESYSTEM }}
      DEPLOYMENT_TARGET: ${{ vars.DEPLOYMENT_TARGET }}
      AZURE_CONTAINER_APPS_WORKLOAD_PROFILE: ${{ vars.AZURE_CONTAINER_APPS_WORKLOAD_PROFILE }}
      USE_CHAT_HISTORY_BROWSER: ${{ vars.USE_CHAT_HISTORY_BROWSER }}
      USE_MEDIA_DESCRIBER_AZURE_CU: ${{ vars.USE_MEDIA_DESCRIBER_AZURE_CU }}
      USE_AI_PROJECT: ${{ vars.USE_AI_PROJECT }}
      RAG_SEARCH_TEXT_EMBEDDINGS: ${{ vars.RAG_SEARCH_TEXT_EMBEDDINGS }}
      RAG_SEARCH_IMAGE_EMBEDDINGS: ${{ vars.RAG_SEARCH_IMAGE_EMBEDDINGS }}
      RAG_SEND_TEXT_SOURCES: ${{ vars.RAG_SEND_TEXT_SOURCES }}
      RAG_SEND_IMAGE_SOURCES: ${{ vars.RAG_SEND_IMAGE_SOURCES }}
      USE_AGENTIC_KNOWLEDGEBASE: ${{ vars.USE_AGENTIC_KNOWLEDGEBASE }}
      USE_WEB_SOURCE: ${{ vars.USE_WEB_SOURCE }}
      USE_SHAREPOINT_SOURCE: ${{ vars.USE_SHAREPOINT_SOURCE }}
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Install azd
        uses: Azure/setup-azd@v2.2.1

      - name: Install Nodejs
        uses: actions/setup-node@v6
        with:
          node-version: 20

      - name: Log in with Azure (Federated Credentials)
        run: |
          azd auth login `
            --client-id "$Env:AZURE_CLIENT_ID" `
            --federated-credential-provider "github" `
            --tenant-id "$Env:AZURE_TENANT_ID"
        shell: pwsh

      - name: Provision Infrastructure
        run: azd provision --no-prompt
        env:
          AZD_INITIAL_ENVIRONMENT_CONFIG: ${{ secrets.AZD_INITIAL_ENVIRONMENT_CONFIG }}
          AZURE_SERVER_APP_SECRET: ${{ secrets.AZURE_SERVER_APP_SECRET }}
          AZURE_CLIENT_APP_SECRET: ${{ secrets.AZURE_CLIENT_APP_SECRET }}

      - name: Deploy Application
        run: azd deploy --no-prompt


================================================
FILE: .github/workflows/evaluate.yaml
================================================
name: Evaluate RAG answer flow

on:
  issue_comment:
    types: [created]

# Set up permissions for deploying with secretless Azure federated credentials
# https://learn.microsoft.com/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication
permissions:
  id-token: write
  contents: read
  issues: write
  pull-requests: write

jobs:
  evaluate:
    if: |
      contains('["OWNER", "CONTRIBUTOR", "COLLABORATOR", "MEMBER"]', github.event.comment.author_association) &&
      github.event.issue.pull_request &&
      github.event.comment.body == '/evaluate'
    runs-on: ubuntu-latest
    env:
      # azd required
      AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
      AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
      AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
      AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }}
      AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
      # project specific
      AZURE_OPENAI_SERVICE: ${{ vars.AZURE_OPENAI_SERVICE }}
      AZURE_OPENAI_LOCATION: ${{ vars.AZURE_OPENAI_LOCATION }}
      AZURE_OPENAI_RESOURCE_GROUP: ${{ vars.AZURE_OPENAI_RESOURCE_GROUP }}
      AZURE_DOCUMENTINTELLIGENCE_SERVICE: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SERVICE }}
      AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP }}
      AZURE_DOCUMENTINTELLIGENCE_SKU: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SKU }}
      AZURE_DOCUMENTINTELLIGENCE_LOCATION: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_LOCATION }}
      AZURE_VISION_SERVICE: ${{ vars.AZURE_VISION_SERVICE }}
      AZURE_VISION_RESOURCE_GROUP: ${{ vars.AZURE_VISION_RESOURCE_GROUP }}
      AZURE_VISION_LOCATION: ${{ vars.AZURE_VISION_LOCATION }}
      AZURE_VISION_SKU: ${{ vars.AZURE_VISION_SKU }}
      AZURE_SEARCH_INDEX: ${{ vars.AZURE_SEARCH_INDEX }}
      AZURE_SEARCH_SERVICE: ${{ vars.AZURE_SEARCH_SERVICE }}
      AZURE_SEARCH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SEARCH_SERVICE_RESOURCE_GROUP }}
      AZURE_SEARCH_SERVICE_LOCATION: ${{ vars.AZURE_SEARCH_SERVICE_LOCATION }}
      AZURE_SEARCH_SERVICE_SKU: ${{ vars.AZURE_SEARCH_SERVICE_SKU }}
      AZURE_SEARCH_QUERY_LANGUAGE: ${{ vars.AZURE_SEARCH_QUERY_LANGUAGE }}
      AZURE_SEARCH_QUERY_SPELLER: ${{ vars.AZURE_SEARCH_QUERY_SPELLER }}
      AZURE_SEARCH_SEMANTIC_RANKER: ${{ vars.AZURE_SEARCH_SEMANTIC_RANKER }}
      AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }}
      AZURE_STORAGE_RESOURCE_GROUP: ${{ vars.AZURE_STORAGE_RESOURCE_GROUP }}
      AZURE_STORAGE_SKU: ${{ vars.AZURE_STORAGE_SKU }}
      AZURE_APP_SERVICE_PLAN: ${{ vars.AZURE_APP_SERVICE_PLAN }}
      AZURE_APP_SERVICE_SKU: ${{ vars.AZURE_APP_SERVICE_SKU }}
      AZURE_APP_SERVICE: ${{ vars.AZURE_APP_SERVICE }}
      AZURE_OPENAI_CHATGPT_MODEL: ${{ vars.AZURE_OPENAI_CHATGPT_MODEL }}
      AZURE_OPENAI_CHATGPT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT }}
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY }}
      AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION }}
      AZURE_OPENAI_EMB_MODEL_NAME: ${{ vars.AZURE_OPENAI_EMB_MODEL_NAME }}
      AZURE_OPENAI_EMB_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT }}
      AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY }}
      AZURE_OPENAI_EMB_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_VERSION }}
      AZURE_OPENAI_EMB_DIMENSIONS: ${{ vars.AZURE_OPENAI_EMB_DIMENSIONS }}
      USE_EVAL: ${{ vars.USE_EVAL }}
      AZURE_OPENAI_EVAL_MODEL: ${{ vars.AZURE_OPENAI_EVAL_MODEL }}
      AZURE_OPENAI_EVAL_MODEL_VERSION: ${{ vars.AZURE_OPENAI_EVAL_MODEL_VERSION }}
      AZURE_OPENAI_EVAL_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT }}
      AZURE_OPENAI_EVAL_DEPLOYMENT_SKU: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_SKU }}
      AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY }}
      AZURE_OPENAI_DISABLE_KEYS: ${{ vars.AZURE_OPENAI_DISABLE_KEYS }}
      OPENAI_HOST: ${{ vars.OPENAI_HOST }}
      OPENAI_API_KEY: ${{ vars.OPENAI_API_KEY }}
      OPENAI_ORGANIZATION: ${{ vars.OPENAI_ORGANIZATION }}
      AZURE_USE_APPLICATION_INSIGHTS: ${{ vars.AZURE_USE_APPLICATION_INSIGHTS }}
      AZURE_APPLICATION_INSIGHTS: ${{ vars.AZURE_APPLICATION_INSIGHTS }}
      AZURE_APPLICATION_INSIGHTS_DASHBOARD: ${{ vars.AZURE_APPLICATION_INSIGHTS_DASHBOARD }}
      AZURE_LOG_ANALYTICS: ${{ vars.AZURE_LOG_ANALYTICS }}
      USE_VECTORS: ${{ vars.USE_VECTORS }}
      USE_MULTIMODAL: ${{ vars.USE_MULTIMODAL }}
      AZURE_VISION_ENDPOINT: ${{ vars.AZURE_VISION_ENDPOINT }}
      VISION_SECRET_NAME: ${{ vars.VISION_SECRET_NAME }}
      ENABLE_LANGUAGE_PICKER: ${{ vars.ENABLE_LANGUAGE_PICKER }}
      USE_SPEECH_INPUT_BROWSER: ${{ vars.USE_SPEECH_INPUT_BROWSER }}
      USE_SPEECH_OUTPUT_BROWSER: ${{ vars.USE_SPEECH_OUTPUT_BROWSER }}
      USE_SPEECH_OUTPUT_AZURE: ${{ vars.USE_SPEECH_OUTPUT_AZURE }}
      AZURE_SPEECH_SERVICE: ${{ vars.AZURE_SPEECH_SERVICE }}
      AZURE_SPEECH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SPEECH_RESOURCE_GROUP }}
      AZURE_SPEECH_SERVICE_LOCATION: ${{ vars.AZURE_SPEECH_SERVICE_LOCATION }}
      AZURE_SPEECH_SERVICE_SKU: ${{ vars.AZURE_SPEECH_SERVICE_SKU }}
      AZURE_SPEECH_SERVICE_VOICE: ${{ vars.AZURE_SPEECH_SERVICE_VOICE }}
      AZURE_KEY_VAULT_NAME: ${{ vars.AZURE_KEY_VAULT_NAME }}
      AZURE_USE_AUTHENTICATION: ${{ vars.AZURE_USE_AUTHENTICATION }}
      AZURE_ENFORCE_ACCESS_CONTROL: ${{ vars.AZURE_ENFORCE_ACCESS_CONTROL }}
      AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: ${{ vars.AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS }}
      AZURE_ENABLE_UNAUTHENTICATED_ACCESS: ${{ vars.AZURE_ENABLE_UNAUTHENTICATED_ACCESS }}
      AZURE_AUTH_TENANT_ID: ${{ vars.AZURE_AUTH_TENANT_ID }}
      AZURE_SERVER_APP_ID: ${{ vars.AZURE_SERVER_APP_ID }}
      AZURE_CLIENT_APP_ID: ${{ vars.AZURE_CLIENT_APP_ID }}
      ALLOWED_ORIGIN: ${{ vars.ALLOWED_ORIGIN }}
      AZURE_ADLS_GEN2_STORAGE_ACCOUNT: ${{ vars.AZURE_ADLS_GEN2_STORAGE_ACCOUNT }}
      AZURE_ADLS_GEN2_FILESYSTEM: ${{ vars.AZURE_ADLS_GEN2_FILESYSTEM }}
      DEPLOYMENT_TARGET: ${{ vars.DEPLOYMENT_TARGET }}
      AZURE_CONTAINER_APPS_WORKLOAD_PROFILE: ${{ vars.AZURE_CONTAINER_APPS_WORKLOAD_PROFILE }}
      USE_CHAT_HISTORY_BROWSER: ${{ vars.USE_CHAT_HISTORY_BROWSER }}
      USE_MEDIA_DESCRIBER_AZURE_CU: ${{ vars.USE_MEDIA_DESCRIBER_AZURE_CU }}
      USE_AI_PROJECT: ${{ vars.USE_AI_PROJECT }}
    steps:

      - name: Comment on pull request
        uses: actions/github-script@v8
        with:
          script: |
            github.rest.issues.createComment({
              issue_number: context.issue.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: "Starting evaluation! Check the Actions tab for progress, or wait for a comment with the results."
            })

      - name: Checkout pull request
        uses: actions/checkout@v6
        with:
          ref: refs/pull/${{ github.event.issue.number }}/head

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true
          version: "0.9.5"
          cache-dependency-glob: "requirements**.txt"
          python-version: "3.11"

      - name: Setup node
        uses: actions/setup-node@v6
        with:
          node-version: 20

      - name: Install azd
        uses: Azure/setup-azd@v2.2.1

      - name: Login to Azure with az CLI
        uses: azure/login@v2
        with:
          client-id: ${{ env.AZURE_CLIENT_ID }}
          tenant-id: ${{ env.AZURE_TENANT_ID }}
          subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}

      - name: Set az account
        uses: azure/CLI@v2
        with:
          inlineScript: |
            az account set --subscription ${{env.AZURE_SUBSCRIPTION_ID}}

      - name: Login to with Azure with azd (Federated Credentials)
        run: |
          azd auth login `
            --client-id "$Env:AZURE_CLIENT_ID" `
            --federated-credential-provider "github" `
            --tenant-id "$Env:AZURE_TENANT_ID"
        shell: pwsh

      - name: Refresh azd environment variables
        run: |
            azd env refresh -e $AZURE_ENV_NAME --no-prompt
        env:
            AZD_INITIAL_ENVIRONMENT_CONFIG: ${{ secrets.AZD_INITIAL_ENVIRONMENT_CONFIG }}

      - name: Build frontend
        run: |
          cd ./app/frontend
          npm install
          npm run build

      - name: Install dependencies
        run: |
          uv pip install -r requirements-dev.txt

      - name: Run local server in background
        run: |
          cd app/backend
          RUNNER_TRACKING_ID="" && (nohup python3 -m quart --app main:app run --port 50505 > serverlogs.out 2> serverlogs.err &)
          cd ../..

      - name: Install evaluate dependencies
        run: |
          uv pip install -r evals/requirements.txt

      - name: Evaluate local RAG flow
        run: |
          python evals/evaluate.py --targeturl=http://127.0.0.1:50505/chat --resultsdir=evals/results/pr${{ github.event.issue.number }}

      - name: Upload eval results as build artifact
        if: ${{ success() }}
        uses: actions/upload-artifact@v7
        with:
            name: eval_result
            path: ./evals/results/pr${{ github.event.issue.number }}

      - name: Upload server logs as build artifact
        uses: actions/upload-artifact@v7
        with:
          name: server_logs
          path: ./app/backend/serverlogs.out

      - name: Upload server error logs as build artifact
        uses: actions/upload-artifact@v7
        with:
          name: server_error_logs
          path: ./app/backend/serverlogs.err

      - name: Summarize results
        if: ${{ success() }}
        run: |
          echo "## Evaluation results" >> eval-summary.md
          python -m evaltools summary evals/results --output=markdown >> eval-summary.md
          echo "## Answer differences across runs" >> run-diff.md
          python -m evaltools diff evals/results/baseline evals/results/pr${{ github.event.issue.number }} --output=markdown >> run-diff.md
          cat eval-summary.md >> $GITHUB_STEP_SUMMARY
          cat run-diff.md >> $GITHUB_STEP_SUMMARY

      - name: Comment on pull request
        uses: actions/github-script@v8
        with:
          script: |
            const fs = require('fs');
            const summaryPath = "eval-summary.md";
            const summary = fs.readFileSync(summaryPath, 'utf8');
            const runId = process.env.GITHUB_RUN_ID;
            const repo = process.env.GITHUB_REPOSITORY;
            const actionsUrl = `https://github.com/${repo}/actions/runs/${runId}`;
            github.rest.issues.createComment({
              issue_number: context.issue.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: `${summary}\n\n[Check the workflow run for more details](${actionsUrl}).`
            })


================================================
FILE: .github/workflows/frontend.yaml
================================================
name: Frontend linting

on:
  push:
    branches: [ main ]
    paths:
      - "app/frontend/**"
  pull_request:
    branches: [ main ]
    paths:
      - "app/frontend/**"

jobs:
    prettier:
        runs-on: ubuntu-latest
        steps:
            - uses: actions/checkout@v6
            - name: Run prettier on frontend
              run: |
                cd ./app/frontend
                npm install
                npx prettier --check .


================================================
FILE: .github/workflows/lint-markdown.yml
================================================
name: Validate Markdown

on:
  pull_request:
    branches:
      - main
    paths:
      - '**.md'

jobs:
  lint-markdown:
    name: Check for Markdown linting errors
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
        uses: actions/checkout@v6
      - name: Run markdownlint-cli2
        uses: DavidAnson/markdownlint-cli2-action@v22
        with:
          config: .markdownlint-cli2.jsonc
          globs: |
            **/*.md
            !data/**
            !.github/**


================================================
FILE: .github/workflows/nightly-jobs.yaml
================================================
name: Nightly Jobs

on:
  schedule:
    - cron: '0 0 * * *'
  workflow_dispatch:

jobs:
  python-test:
    uses: ./.github/workflows/python-test.yaml


================================================
FILE: .github/workflows/python-test.yaml
================================================
name: Python check

on:
  push:
    branches: [ main ]
    paths-ignore:
      - "**.md"
      - ".azdo/**"
      - ".devcontainer/**"
      - ".github/**"
  pull_request:
    branches: [ main ]
    paths-ignore:
      - "**.md"
      - ".azdo/**"
      - ".devcontainer/**"
      - ".github/**"
  workflow_call:

jobs:
  test_package:
    name: Test ${{ matrix.os }} Python ${{ matrix.python_version }} Node ${{ matrix.node_version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: ["ubuntu-latest", "windows-latest"]
        python_version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
        node_version: ["20.14", "22"]
    steps:
        - uses: actions/checkout@v6
          with:
            # Fetch full history so diff-cover can compute a merge base with origin/main
            fetch-depth: 0
        - name: Install uv
          uses: astral-sh/setup-uv@v7
          with:
            enable-cache: true
            version: "0.9.5"
            cache-dependency-glob: "requirements**.txt"
            python-version: ${{ matrix.python_version }}
            activate-environment: true
        - name: Setup node
          uses: actions/setup-node@v6
          with:
            node-version: ${{ matrix.node_version }}
        - name: Build frontend
          run: |
            cd ./app/frontend
            npm install
            npm run build
        - name: Check i18n translations
          run: npx -y @lingual/i18n-check@0.8.12 --locales app/frontend/src/locales -s en -f i18next -r summary
        - name: Install dependencies
          run: |
            uv pip install -r requirements-dev.txt
        - name: Lint with ruff
          run: ruff check .
        - name: Check types with ty
          run: ty check
        - name: Check formatting with black
          run: black . --check --verbose
        - name: Run Python tests
          if: runner.os != 'Windows'
          run: pytest -s -vv --cov --cov-report=xml --cov-fail-under=90
        - name: Check diff coverage
          if: runner.os != 'Windows'
          run: |
            BASE_REF="${{ github.base_ref }}"
            if [ -z "$BASE_REF" ]; then BASE_REF="main"; fi
            git fetch origin "$BASE_REF:refs/remotes/origin/$BASE_REF"
            diff-cover coverage.xml --compare-branch="origin/$BASE_REF" --fail-under=90
        - name: Run E2E tests with Playwright
          id: e2e
          if: runner.os != 'Windows'
          run: |
            playwright install chromium --with-deps
            pytest tests/e2e.py --tracing=retain-on-failure
        - name: Upload test artifacts
          if: ${{ failure() && steps.e2e.conclusion == 'failure' }}
          uses: actions/upload-artifact@v7
          with:
            name: playwright-traces${{ matrix.python_version }}
            path: test-results


================================================
FILE: .github/workflows/stale-bot.yml
================================================
name: 'Close stale issues and PRs'
on:
  schedule:
    - cron: '30 1 * * *'

jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/stale@v10
        with:
          stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this issue will be closed.'
          stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed.'
          close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity.'
          close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
          days-before-issue-stale: 60
          days-before-pr-stale: 60
          days-before-issue-close: -1
          days-before-pr-close: -1


================================================
FILE: .github/workflows/validate-markdown.yml
================================================
name: Validate Markdown

on:
  # Trigger the workflow on pull request
  pull_request_target:
    branches:
      - main
    paths:
      - '**.md'
      - '**.ipynb'

permissions:
  contents: read
  pull-requests: write

jobs:
  check-broken-paths:
    name: Check Broken Relative Paths
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repo
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.pull_request.head.sha }}
      - name: Check broken Paths
        id: check-broken-paths
        uses: john0isaac/action-check-markdown@v1.1.0
        with:
          command: check_broken_paths
          directory: ./
          guide-url: 'https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/CONTRIBUTING.md'
          github-token: ${{ secrets.GITHUB_TOKEN }}
  check-urls-locale:
    if: ${{ always() }}
    needs: check-broken-paths
    name: Check URLs Don't Have Locale
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repo
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.pull_request.head.sha }}
      - name: Run Check URLs Country Locale
        id: check-urls-locale
        uses: john0isaac/action-check-markdown@v1.1.0
        with:
          command: check_urls_locale
          directory: ./
          guide-url: 'https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/CONTRIBUTING.md'
          github-token: ${{ secrets.GITHUB_TOKEN }}
  check-broken-urls:
    if: ${{ always() }}
    name: Check Broken URLs
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repo
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.pull_request.head.sha }}
      - name: Run Check Broken URLs
        id: check-broken-urls
        uses: john0isaac/action-check-markdown@v1.1.0
        with:
          command: check_broken_urls
          directory: ./
          guide-url: 'https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/CONTRIBUTING.md'
          github-token: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .gitignore
================================================
# Azure az webapp deployment details
.azure
*_env

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
coverage_report.html

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
.evalenv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# ty
.ty_cache/

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# NPM
npm-debug.log*
node_modules
static/

app/functions/*/prepdocslib/
app/functions/*/requirements.txt

data/**/*.md5

.DS_Store


================================================
FILE: .markdownlint-cli2.jsonc
================================================
{
    "config": {
        "default": true,
        "line-length": false,
        "table-column-style": false,
        "MD033": { "allowed_elements": ["br", "details", "summary"] }
    }
}


================================================
FILE: .pre-commit-config.yaml
================================================
exclude: '^tests/snapshots/'
repos:
-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
    -   id: check-yaml
    -   id: end-of-file-fixer
    -   id: trailing-whitespace
-   repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.14.2
    hooks:
    -   id: ruff
-   repo: https://github.com/psf/black
    rev: 26.1.0
    hooks:
    -   id: black
-   repo: https://github.com/pre-commit/mirrors-prettier
    rev: v3.1.0
    hooks:
    - id: prettier
      types_or: [css, javascript, ts, tsx, html]


================================================
FILE: .vscode/extensions.json
================================================
{
    "recommendations": [
        "ms-azuretools.azure-dev",
        "ms-azuretools.vscode-bicep",
        "ms-python.python",
        "astral-sh.ty",
        "esbenp.prettier-vscode",
        "DavidAnson.vscode-markdownlint"
    ]
}


================================================
FILE: .vscode/launch.json
================================================
{
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Backend (Python)",
            "type": "debugpy",
            "request": "launch",
            "module": "quart",
            "cwd": "${workspaceFolder}/app/backend",
            // Use the currently selected interpreter in VS Code. MAC/Linux use /bin, Windows uses /Scripts
            "python": "${command:python.interpreterPath}",
            "env": {
                "QUART_APP": "main:app",
                "QUART_ENV": "development",
                "QUART_DEBUG": "0",
                // Set this to "no-override" if you want env vars here to override AZD env vars
                "LOADING_MODE_FOR_AZD_ENV_VARS": "override"
            },
            "args": [
                "run",
                "--no-reload",
                "-p 50505"
            ],
            "console": "integratedTerminal",
            "justMyCode": false
        },
        {
            "name": "Frontend",
            "type": "node-terminal",
            "request": "launch",
            "command": "npm run dev",
            "cwd": "${workspaceFolder}/app/frontend",
        },
        {
            "name": "Tests (Python)",
            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "purpose": ["debug-test"],
            "console": "integratedTerminal",
            "justMyCode": false
          }
    ],
    "compounds": [
        {
          "name": "Frontend & Backend",
          "configurations": ["Backend (Python)", "Frontend"],
          "stopAll": true
        }
    ]
}


================================================
FILE: .vscode/settings.json
================================================
{
    "python.languageServer": "None", // Disabling due to ty using its own full-featured language server
    "[javascript]": {
        "editor.defaultFormatter": "esbenp.prettier-vscode",
        "editor.formatOnSave": true
    },
    "[typescript]": {
        "editor.defaultFormatter": "esbenp.prettier-vscode",
        "editor.formatOnSave": true
    },
    "[typescriptreact]": {
        "editor.defaultFormatter": "esbenp.prettier-vscode",
        "editor.formatOnSave": true
    },
    "[css]": {
        "editor.defaultFormatter": "esbenp.prettier-vscode",
        "editor.formatOnSave": true
    },
    "files.exclude": {
        "**/__pycache__": true,
        "**/.coverage": true,
        "**/.pytest_cache": true,
        "**/.ruff_cache": true,
        "**/.mypy_cache": true,
        "**/.ty_cache": true
    },
    "search.exclude": {
        "**/node_modules": true,
        "static": true
    },
    "python.testing.pytestArgs": [
        "tests"
    ],
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true
}


================================================
FILE: .vscode/tasks.json
================================================
{
    "version": "2.0.0",
    "tasks": [
        {
            "label": "Start App",
            "type": "shell",
            "command": "${workspaceFolder}/app/start.sh",
            "windows": {
                "command": "pwsh ${workspaceFolder}/app/start.ps1"
            },
            "presentation": {
                "reveal": "silent"
            },
            "options": {
                "cwd": "${workspaceFolder}/app"
            },
            "problemMatcher": []
        },
        {
            "label": "Development",
            "dependsOn": [
                "Frontend: npm run dev",
                "Backend: quart run"
            ],
            "group": {
                "kind": "build",
                "isDefault": true
            }
        },
        {
            "label": "Frontend: npm run dev",
            "type": "npm",
            "script": "dev",
            "isBackground": true,
            "options": {
                "cwd": "${workspaceFolder}/app/frontend"
            },
            "presentation": {
                "reveal": "always",
                "group": "buildWatchers",
                "panel": "dedicated",
                "clear": false
            },
            "problemMatcher": {
                "pattern": {
                    "regexp": ""
                },
                "background": {
                    "activeOnStart": true,
                    "beginsPattern": ".*VITE v.*",
                    "endsPattern": ".*(?:➜\\s*)?Local:\\s+https?://.*"
                }
            }
        },
        {
            "label": "Backend: quart run",
            "type": "shell",
            "command": "${workspaceFolder}/.venv/bin/python",
            "windows": {
                "command": "${workspaceFolder}\\.venv\\Scripts\\python.exe"
            },
            "args": ["-m", "quart", "run", "--reload", "-p", "50505"],
            "options": {
                "cwd": "${workspaceFolder}/app/backend",
                "env": {
                    "QUART_APP": "main:app",
                    "QUART_ENV": "development",
                    "QUART_DEBUG": "0",
                    "LOADING_MODE_FOR_AZD_ENV_VARS": "override"
                }
            },
            "isBackground": true,
            "presentation": {
                "reveal": "always",
                "group": "buildWatchers",
                "panel": "dedicated"
            },
            "problemMatcher": {
                "pattern": { "regexp": "" },
                "background": {
                    "activeOnStart": true,
                    "beginsPattern": ".*Serving Quart app.*",
                    "endsPattern": ".*hypercorn.*Running on http://.*"
                }
            }
        }
    ]
}


================================================
FILE: AGENTS.md
================================================
# Instructions for Coding Agents

This file contains instructions for developers working on the Azure Search and OpenAI demo application. It covers the overall code layout, how to add new data, how to add new azd environment variables, how to add new developer settings, and how to add tests for new features.

Always keep this file up to date with any changes to the codebase or development process.
If necessary, edit this file to ensure it accurately reflects the current state of the project.

## Overall code layout

* app: Contains the main application code, including frontend and backend.
  * app/backend: Contains the Python backend code, written with Quart framework.
    * app/backend/approaches: Contains the different approaches
      * app/backend/approaches/approach.py: Base class for all approaches
      * app/backend/approaches/chatreadretrieveread.py: Chat approach, includes query rewriting step first
      * app/backend/approaches/promptmanager.py: Manages loading and rendering of Jinja2 prompt templates
      * app/backend/approaches/prompts/query_rewrite.system.jinja2: Jinja2 template used to rewrite the query based off search history into a better search query
      * app/backend/approaches/prompts/chat_query_rewrite_tools.json: Tools used by the query rewriting prompt
      * app/backend/approaches/prompts/chat_answer.system.jinja2: Jinja2 template for the system message used by the Chat approach to answer questions
      * app/backend/approaches/prompts/chat_answer.user.jinja2: Jinja2 template for the user message used by the Chat approach, including sources
    * app/backend/prepdocslib: Contains the document ingestion library used by both local and cloud ingestion
      * app/backend/prepdocslib/blobmanager.py: Manages uploads to Azure Blob Storage
      * app/backend/prepdocslib/cloudingestionstrategy.py: Builds the Azure AI Search indexer and skillset for the cloud ingestion pipeline
      * app/backend/prepdocslib/csvparser.py: Parses CSV files
      * app/backend/prepdocslib/embeddings.py: Generates embeddings for text and images using Azure OpenAI
      * app/backend/prepdocslib/figureprocessor.py: Generates figure descriptions for both local ingestion and the cloud figure-processor skill
      * app/backend/prepdocslib/fileprocessor.py: Orchestrates parsing and chunking of individual files
      * app/backend/prepdocslib/filestrategy.py: Strategy for uploading and indexing files (local ingestion)
      * app/backend/prepdocslib/htmlparser.py: Parses HTML files
      * app/backend/prepdocslib/integratedvectorizerstrategy.py: Strategy using Azure AI Search integrated vectorization
      * app/backend/prepdocslib/jsonparser.py: Parses JSON files
      * app/backend/prepdocslib/listfilestrategy.py: Lists files from local filesystem or Azure Data Lake
      * app/backend/prepdocslib/mediadescriber.py: Interfaces for describing images (Azure OpenAI GPT-4o, Content Understanding)
      * app/backend/prepdocslib/page.py: Data classes for pages, images, and chunks
      * app/backend/prepdocslib/parser.py: Base parser interface
      * app/backend/prepdocslib/pdfparser.py: Parses PDFs using Azure Document Intelligence or local parser
      * app/backend/prepdocslib/searchmanager.py: Manages Azure AI Search index creation and updates
      * app/backend/prepdocslib/servicesetup.py: Shared service setup helpers for OpenAI, embeddings, blob storage, etc.
      * app/backend/prepdocslib/strategy.py: Base strategy interface for document ingestion
      * app/backend/prepdocslib/textparser.py: Parses plain text and markdown files
      * app/backend/prepdocslib/textprocessor.py: Processes text chunks for cloud ingestion (merges figures, generates embeddings)
      * app/backend/prepdocslib/textsplitter.py: Splits text into chunks using different strategies
    * app/backend/app.py: The main entry point for the backend application.
  * app/functions: Azure Functions used for cloud ingestion custom skills (document extraction, figure processing, text processing). Each function bundles a synchronized copy of `prepdocslib`; run `python scripts/copy_prepdocslib.py` to refresh the local copies if you modify the library.
  * app/frontend: Contains the React frontend code, built with TypeScript, built with vite.
    * app/frontend/src/api: Contains the API client code for communicating with the backend.
    * app/frontend/src/components: Contains the React components for the frontend.
    * app/frontend/src/locales: Contains the translation files for internationalization.
      * app/frontend/src/locales/da/translation.json: Danish translations
      * app/frontend/src/locales/en/translation.json: English translations
      * app/frontend/src/locales/es/translation.json: Spanish translations
      * app/frontend/src/locales/fr/translation.json: French translations
      * app/frontend/src/locales/it/translation.json: Italian translations
      * app/frontend/src/locales/ja/translation.json: Japanese translations
      * app/frontend/src/locales/nl/translation.json: Dutch translations
      * app/frontend/src/locales/ptBR/translation.json: Portuguese translations
      * app/frontend/src/locales/tr/translation.json: Turkish translations
    * app/frontend/src/pages: Contains the main pages of the application
* infra: Contains the Bicep templates for provisioning Azure resources.
* tests: Contains the test code, including e2e tests, app integration tests, and unit tests.

## Adding new data

New files should be added to the `data` folder, and then either run scripts/prepdocs.sh or scripts/prepdocs.ps1 to ingest the data.

## Adding a new azd environment variable

An azd environment variable is stored by the azd CLI for each environment. It is passed to the "azd up" command and can configure both provisioning options and application settings.
When adding new azd environment variables, update:

1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable
1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `appEnvVariables` object
1. .azdo/pipelines/azure-dev.yml: Add the new environment variable under `env` section
1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section

You may also need to update:

1. app/backend/prepdocs.py: If the variable is used in the ingestion script, retrieve it from environment variables here. Not always needed.
1. app/backend/app.py: If the variable is used in the backend application, retrieve it from environment variables in setup_clients() function. Not always needed.

## Adding a new setting to "Developer Settings" in RAG app

When adding a new developer setting, update:

* frontend:
  * app/frontend/src/api/models.ts : Add to ChatAppRequestOverrides
  * app/frontend/src/components/Settings.tsx : Add a UI element for the setting
  * app/frontend/src/locales/*/translations.json: Add a translation for the setting label/tooltip for all languages
  * app/frontend/src/pages/chat/Chat.tsx: Add the setting to the component, pass it to Settings

* backend:
  * app/backend/approaches/chatreadretrieveread.py :  Retrieve from overrides parameter
  * app/backend/app.py: Some settings may need to be sent down in the /config route.

## When adding tests for a new feature

All tests are in the `tests` folder and use the pytest framework.
There are three styles of tests:

* e2e tests: These use playwright to run the app in a browser and test the UI end-to-end. They are in e2e.py and they mock the backend using the snapshots from the app tests. (Before running e2e tests, make sure to run `npm run build` in app/frontend first to build the frontend code.)
* app integration tests: Mostly in test_app.py, these test the app's API endpoints and use mocks for services like Azure OpenAI and Azure Search.
* unit tests: The rest of the tests are unit tests that test individual functions and methods. They are in test_*.py files.

When adding a new feature, add tests for it in the appropriate file.
If the feature is a UI element, add an e2e test for it.
If it is an API endpoint, add an app integration test for it.
If it is a function or method, add a unit test for it.
Use mocks from tests/conftest.py to mock external services. Prefer mocking at the HTTP/requests level when possible.

When you're running tests, make sure you activate the .venv virtual environment first:

```shell
source .venv/bin/activate
```

To check for coverage, run the following command:

```shell
pytest --cov --cov-report=annotate:cov_annotate
```

Open the cov_annotate directory to view the annotated source code. There will be one file per source file. If a file has 100% source coverage, it means all lines are covered by tests, so you do not need to open the file.

For each file that has less than 100% test coverage, find the matching file in cov_annotate and review the file.

If a line starts with a ! (exclamation mark), it means that the line is not covered by tests. Add tests to cover the missing lines.

## Sending pull requests

When sending pull requests, make sure to follow the PULL_REQUEST_TEMPLATE.md format.

## Upgrading dependencies

### Python backend dependencies

To upgrade a particular package in the backend, use the following command, replacing `<package-name>` with the name of the package you want to upgrade:

```shell
cd app/backend && uv pip compile requirements.in -o requirements.txt --python-version 3.10 --upgrade-package <package-name>
```

After upgrading, run tests to verify compatibility:

```shell
source .venv/bin/activate
pytest tests/
```

### npm frontend dependencies

To upgrade a particular package in the frontend:

1. **Navigate to the frontend directory**:

   ```shell
   cd app/frontend
   ```

2. **Upgrade the package** (replace `<package-name>` with the package you want to upgrade):

   ```shell
   npm install <package-name>@latest
   ```

3. **Build the frontend** to verify the upgrade works:

   ```shell
   npm run build
   ```

4. **Run all tests** to ensure nothing broke:

   ```shell
   # Run e2e tests from the root directory
   cd ../..
   source .venv/bin/activate
   pytest tests/e2e.py
   ```

5. **Commit changes** if the upgrade is successful:

   ```shell
   git add package.json package-lock.json
   git commit -m "chore: upgrade <package-name> to <version>"
   ```

**Important notes for frontend upgrades**:

* When upgrading React or related core packages, you may need to upgrade multiple packages together (e.g., `react`, `react-dom`, `@types/react`, `@types/react-dom`)
* Some upgrades may require code changes for API compatibility - check the package's changelog
* For major version upgrades of UI libraries like Fluent UI or MSAL, review breaking changes carefully. Manual tests are required for any MSAL changes since the E2E tests do not cover authentication flows.
* If npm reports peer dependency conflicts, the `.npmrc` file has `legacy-peer-deps=true` which allows the install to proceed. This is currently needed because `react-helmet-async` declares peer dependencies on React 17/18, but works fine with React 19.

## Checking Python type hints

To check Python type hints, use the following command:

```shell
ty check
```

Note that we do not currently enforce type hints in the tests folder, as it would require adding a lot of `# type: ignore` comments to the existing tests.
We only enforce type hints in the main application code and scripts.

## Python code style

Do not use single underscores in front of "private" methods or variables in Python code. We do not follow that convention in this codebase, since this is an application and not a library.

## Deploying the application

To deploy the application, use the `azd` CLI tool. Make sure you have the latest version of the `azd` CLI installed. Then, run the following command from the root of the repository:

```shell
azd up
```

That command will BOTH provision the Azure resources AND deploy the application code.

If you only changed the Bicep templates and want to re-provision the Azure resources, run:

```shell
azd provision
```

If you only changed the application code and want to re-deploy the code, run:

```shell
azd deploy
```

If you are using cloud ingestion and only want to deploy individual functions, run the necessary deploy commands, for example:

```shell
azd deploy document-extractor
azd deploy figure-processor
azd deploy text-processor
```


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing

This project welcomes contributions and suggestions.  Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.

When you submit a pull request, a CLA bot will automatically determine whether you need to provide
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
provided by the bot. You will only need to do this once across all repos using our CLA.

This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.

- [Submitting a Pull Request (PR)](#submitting-a-pull-request-pr)
- [Setting up the development environment](#setting-up-the-development-environment)
- [Running unit tests](#running-unit-tests)
- [Running E2E tests](#running-e2e-tests)
- [Code style](#code-style)
- [Adding new features](#adding-new-features)
  - [Adding new azd environment variables](#adding-new-azd-environment-variables)
  - [Adding new UI strings](#adding-new-ui-strings)

## Submitting a Pull Request (PR)

Before you submit your Pull Request (PR) consider the following guidelines:

- Search the repository (<https://github.com/[organization-name>]/[repository-name]/pulls) for an open or closed PR
  that relates to your submission. You don't want to duplicate effort.
- Make your changes in a new git fork
- Follow [Code style conventions](#code-style)
- [Run the tests](#running-unit-tests) (and write new ones, if needed)
- Commit your changes using a descriptive commit message
- Push your fork to GitHub
- In GitHub, create a pull request to the `main` branch of the repository
- Ask a maintainer to review your PR and address any comments they might have

## Setting up the development environment

Install the development dependencies:

```shell
python -m pip install -r requirements-dev.txt
```

Install the pre-commit hooks:

```shell
pre-commit install
```

Compile the JavaScript:

```shell
( cd ./app/frontend ; npm install ; npm run build )
```

## Running unit tests

Run the tests:

```shell
python -m pytest
```

If test snapshots need updating (and the changes are expected), you can update them by running:

```shell
python -m pytest --snapshot-update
```

Once tests are passing, generate a coverage report to make sure your changes are covered:

```shell
pytest --cov --cov-report=xml && \
diff-cover coverage.xml --html-report coverage_report.html && \
open coverage_report.html
```

## Running E2E tests

Install Playwright browser dependencies:

```shell
playwright install --with-deps
```

Run the tests:

```shell
python -m pytest tests/e2e.py --tracing=retain-on-failure
```

When a failure happens, the trace zip will be saved in the test-results folder.
You can view that using the Playwright CLI:

```shell
playwright show-trace test-results/<trace-zip>
```

You can also use the online trace viewer at <https://trace.playwright.dev/>

## Code style

This codebase includes several languages: TypeScript, Python, Bicep, Powershell, and Bash.
Code should follow the standard conventions of each language.

For Python, you can enforce the conventions using `ruff` and `black`.

Install the development dependencies:

```shell
python -m pip install -r requirements-dev.txt
```

Run `ruff` to lint a file:

```shell
python -m ruff <path-to-file>
```

Run `black` to format a file:

```shell
python -m black <path-to-file>
```

If you followed the steps above to install the pre-commit hooks, then you can just wait for those hooks to run `ruff` and `black` for you.

## Adding new features

We recommend using GitHub Copilot Agent mode when adding new features,
as this project includes an [AGENTS.md](AGENTS.md) file
that instructs Copilot (and other coding agents) about how to generate code for common code changes.

If you are not using Copilot Agent mode, consult both that file and suggestions below.

### Adding new azd environment variables

When adding new azd environment variables, please remember to update:

1. [main.parameters.json](./infra/main.parameters.json)
1. [appEnvVariables in main.bicep](./infra/main.bicep)
1. [ADO pipeline](.azdo/pipelines/azure-dev.yml).
1. [Github workflows](.github/workflows/azure-dev.yml)

### Adding new UI strings

When adding new UI strings, please remember to update all translations.
For any translations that you generate with an AI tool,
please indicate in the PR description which language's strings were AI-generated.

Here are community contributors that can review translations:

| Language | Contributor         |
|----------|---------------------|
| Danish   | @EMjetrot           |
| French   | @manekinekko        |
| Japanese | @bnodir             |
| Norwegian| @@jeannotdamoiseaux |
| Portugese| @glaucia86          |
| Spanish  | @miguelmsft         |
| Turkish  | @mertcakdogan       |
| Italian  | @ivanvaccarics      |
| Dutch    |                     |
| Polish   | @michuhu            |


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2023 Azure Samples

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<!--
---
name: RAG chat app with your data (Python)
description: Chat with your domain data using Azure OpenAI and Azure AI Search.
languages:
- python
- typescript
- bicep
- azdeveloper
products:
- azure-openai
- azure-cognitive-search
- azure-app-service
- azure
page_type: sample
urlFragment: azure-search-openai-demo
---
-->

# RAG chat app with Azure OpenAI and Azure AI Search (Python)

This solution creates a ChatGPT-like frontend experience over your own documents using RAG (Retrieval Augmented Generation). It uses Azure OpenAI Service to access GPT models, and Azure AI Search for data indexing and retrieval.

This solution's backend is written in Python. There are also [**JavaScript**](https://aka.ms/azai/js/code), [**.NET**](https://aka.ms/azai/net/code), and [**Java**](https://aka.ms/azai/java/code) samples based on this one. Learn more about [developing AI apps using Azure AI Services](https://aka.ms/azai).

[![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=599293758&machine=standardLinux32gb&devcontainer_path=.devcontainer%2Fdevcontainer.json&location=WestUs2)
[![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/azure-search-openai-demo)
[![Open in VS Code for the Web](https://img.shields.io/static/v1?style=for-the-badge&label=VS+Code+for+the+Web&message=Open&color=purple&logo=visualstudiocode)](https://vscode.dev/azure?azdTemplateUrl=https://github.com/azure-samples/azure-search-openai-demo)

## Important Security Notice

This template, the application code and configuration it contains, has been built to showcase Microsoft Azure specific services and tools. We strongly advise our customers not to make this code part of their production environments without implementing or enabling additional security features. See our [productionizing guide](docs/productionizing.md) for tips, and consult the [Azure OpenAI Landing Zone reference architecture](https://techcommunity.microsoft.com/blog/azurearchitectureblog/azure-openai-landing-zone-reference-architecture/3882102) for more best practices.

## Table of Contents

- [Features](#features)
- [Azure account requirements](#azure-account-requirements)
  - [Cost estimation](#cost-estimation)
- [Getting Started](#getting-started)
  - [GitHub Codespaces](#github-codespaces)
  - [VS Code Dev Containers](#vs-code-dev-containers)
  - [Local environment](#local-environment)
- [Deploying](#deploying)
  - [Deploying again](#deploying-again)
- [Running the development server](#running-the-development-server)
- [Using the app](#using-the-app)
- [Clean up](#clean-up)
- [Guidance](#guidance)
  - [Resources](#resources)

![Chat screen](docs/images/chatscreen.png)

[📺 Watch a video overview of the app.](https://youtu.be/3acB0OWmLvM)

This sample demonstrates a few approaches for creating ChatGPT-like experiences over your own data using the Retrieval Augmented Generation pattern. It uses Azure OpenAI Service to access a GPT model (gpt-4.1-mini), and Azure AI Search for data indexing and retrieval.

The repo includes sample data so it's ready to try end to end. In this sample application we use a fictitious company called Zava, and the experience allows its employees to ask questions about the benefits, internal policies, as well as job descriptions and roles.

## Features

- Chat (multi-turn) interface
- Renders citations and thought process for each answer
- Includes settings directly in the UI to tweak the behavior and experiment with options
- Integrates Azure AI Search for indexing and retrieval of documents, with support for [many document formats](/docs/data_ingestion.md#supported-document-formats) as well as [cloud data ingestion](/docs/data_ingestion.md#cloud-data-ingestion)
- Optional usage of [multimodal models](/docs/multimodal.md) to reason over image-heavy documents
- Optional addition of [speech input/output](/docs/deploy_features.md#enabling-speech-inputoutput) for accessibility
- Optional automation of [user login and data access](/docs/login_and_acl.md) via Microsoft Entra
- Performance tracing and monitoring with Application Insights

### Architecture Diagram

![RAG Architecture](docs/images/appcomponents.png)

## Azure account requirements

**IMPORTANT:** In order to deploy and run this example, you'll need:

- **Azure account**. If you're new to Azure, [get an Azure account for free](https://azure.microsoft.com/free/cognitive-search/) and you'll get some free Azure credits to get started. See [guide to deploying with the free trial](docs/deploy_freetrial.md).
- **Azure account permissions**:
  - Your Azure account must have `Microsoft.Authorization/roleAssignments/write` permissions, such as [Role Based Access Control Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview), [User Access Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#user-access-administrator), or [Owner](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#owner). If you don't have subscription-level permissions, you must be granted [RBAC](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview) for an existing resource group and [deploy to that existing group](docs/deploy_existing.md#resource-group).
  - Your Azure account also needs `Microsoft.Resources/deployments/write` permissions on the subscription level.

### Cost estimation

Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage.
However, you can try the [Azure pricing calculator](https://azure.com/e/e3490de2372a4f9b909b0d032560e41b) for the resources below.

- Azure Container Apps: Default host for app deployment as of 10/28/2024. See more details in [the ACA deployment guide](docs/azure_container_apps.md). Consumption plan with 1 CPU core, 2 GB RAM, minimum of 0 replicas. Pricing with Pay-as-You-Go. [Pricing](https://azure.microsoft.com/pricing/details/container-apps/)
- Azure Container Registry: Basic tier. [Pricing](https://azure.microsoft.com/pricing/details/container-registry/)
- Azure App Service: Only provisioned if you deploy to Azure App Service following [the App Service deployment guide](docs/azure_app_service.md).  Basic Tier with 1 CPU core, 1.75 GB RAM. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/app-service/linux/)
- Azure OpenAI: Standard tier, GPT and Ada models. Pricing per 1K tokens used, and at least 1K tokens are used per question. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/)
- Azure AI Document Intelligence: SO (Standard) tier using pre-built layout. Pricing per document page, sample documents have 261 pages total. [Pricing](https://azure.microsoft.com/pricing/details/form-recognizer/)
- Azure AI Search: Basic tier, 1 replica, free level of semantic search. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/search/)
- Azure Blob Storage: Standard tier with ZRS (Zone-redundant storage). Pricing per storage and read operations. [Pricing](https://azure.microsoft.com/pricing/details/storage/blobs/)
- Azure Cosmos DB: Only provisioned if you enabled [chat history with Cosmos DB](docs/deploy_features.md#enabling-persistent-chat-history-with-azure-cosmos-db). Serverless tier. Pricing per request unit and storage. [Pricing](https://azure.microsoft.com/pricing/details/cosmos-db/)
- Azure AI Vision: Only provisioned if you enabled [multimodal approach](docs/multimodal.md). Pricing per 1K transactions. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/computer-vision/)
- Azure AI Content Understanding: Only provisioned if you enabled [media description](docs/deploy_features.md#enabling-media-description-with-azure-content-understanding). Pricing per 1K images. [Pricing](https://azure.microsoft.com/pricing/details/content-understanding/)
- Azure Monitor: Pay-as-you-go tier. Costs based on data ingested. [Pricing](https://azure.microsoft.com/pricing/details/monitor/)

To reduce costs, you can switch to free SKUs for various services, but those SKUs have limitations.
See this guide on [deploying with minimal costs](docs/deploy_lowcost.md) for more details.

⚠️ To avoid unnecessary costs, remember to take down your app if it's no longer in use,
either by deleting the resource group in the Portal or running `azd down`.

## Getting Started

You have a few options for setting up this project.
The easiest way to get started is GitHub Codespaces, since it will setup all the tools for you,
but you can also [set it up locally](#local-environment) if desired.

### GitHub Codespaces

You can run this repo virtually by using GitHub Codespaces, which will open a web-based VS Code in your browser:

[![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=599293758&machine=standardLinux32gb&devcontainer_path=.devcontainer%2Fdevcontainer.json&location=WestUs2)

Once the codespace opens (this may take several minutes), open a terminal window.

### VS Code Dev Containers

A related option is VS Code Dev Containers, which will open the project in your local VS Code using the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers):

1. Start Docker Desktop (install it if not already installed)
2. Open the project:
    [![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/azure-search-openai-demo)

3. In the VS Code window that opens, once the project files show up (this may take several minutes), open a terminal window.

### Local environment

1. Install the required tools:

    - [Azure Developer CLI](https://aka.ms/azure-dev/install)
    - [Python 3.10, 3.11, 3.12, 3.13, or 3.14](https://www.python.org/downloads/)
      - **Important**: Python and the pip package manager must be in the path in Windows for the setup scripts to work.
      - **Important**: Ensure you can run `python --version` from console. On Ubuntu, you might need to run `sudo apt install python-is-python3` to link `python` to `python3`.
    - [Node.js 20+](https://nodejs.org/download/)
    - [Git](https://git-scm.com/downloads)
    - [Powershell 7+ (pwsh)](https://github.com/powershell/powershell) - For Windows users only.
      - **Important**: Ensure you can run `pwsh.exe` from a PowerShell terminal. If this fails, you likely need to upgrade PowerShell.

2. Create a new folder and switch to it in the terminal.
3. Run this command to download the project code:

    ```shell
    azd init -t azure-search-openai-demo
    ```

    Note that this command will initialize a git repository, so you do not need to clone this repository.

## Deploying

The steps below will provision Azure resources and deploy the application code to Azure Container Apps. To deploy to Azure App Service instead, follow [the app service deployment guide](docs/azure_app_service.md).

1. Login to your Azure account:

    ```shell
    azd auth login
    ```

    For GitHub Codespaces users, if the previous command fails, try:

   ```shell
    azd auth login --use-device-code
    ```

1. Create a new azd environment:

    ```shell
    azd env new
    ```

    Enter a name that will be used for the resource group.
    This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward.
1. (Optional) This is the point where you can customize the deployment by setting environment variables, in order to [use existing resources](docs/deploy_existing.md), [enable optional features (such as auth or vision)](docs/deploy_features.md), or [deploy low-cost options](docs/deploy_lowcost.md), or [deploy with the Azure free trial](docs/deploy_freetrial.md).
1. Run `azd up` - This will provision Azure resources and deploy this sample to those resources, including building the search index based on the files found in the `./data` folder.
    - **Important**: Beware that the resources created by this command will incur immediate costs, primarily from the AI Search resource. These resources may accrue costs even if you interrupt the command before it is fully executed. You can run `azd down` or delete the resources manually to avoid unnecessary spending.
    - You will be prompted to select two locations, one for the majority of resources and one for the OpenAI resource, which is currently a short list. That location list is based on the [OpenAI model availability table](https://learn.microsoft.com/azure/cognitive-services/openai/concepts/models#model-summary-table-and-region-availability) and may become outdated as availability changes.
1. After the application has been successfully deployed you will see a URL printed to the console.  Click that URL to interact with the application in your browser.
It will look like the following:

!['Output from running azd up'](docs/images/endpoint.png)

> NOTE: It may take 5-10 minutes after you see 'SUCCESS' for the application to be fully deployed. If you see a "Python Developer" welcome screen or an error page, then wait a bit and refresh the page.

### Deploying again

If you've only changed the backend/frontend code in the `app` folder, then you don't need to re-provision the Azure resources. You can just run:

```shell
azd deploy
```

If you've changed the infrastructure files (`infra` folder or `azure.yaml`), then you'll need to re-provision the Azure resources. You can do that by running:

```shell
azd up
```

## Running the development server

You can only run a development server locally **after** having successfully run the `azd up` command. If you haven't yet, follow the [deploying](#deploying) steps above.

1. Run `azd auth login` if you have not logged in recently.
2. Start the server:

  Windows:

  ```shell
  ./app/start.ps1
  ```

  Linux/Mac:

  ```shell
  ./app/start.sh
  ```

  VS Code: Run the "VS Code Task: Start App" task.

It's also possible to enable hotloading or the VS Code debugger.
See more tips in [the local development guide](docs/localdev.md).

## Using the app

- In Azure: navigate to the Azure WebApp deployed by azd. The URL is printed out when azd completes (as "Endpoint"), or you can find it in the Azure portal.
- Running locally: navigate to 127.0.0.1:50505

Once in the web app:

- Try different topics in chat. Try follow up questions, clarifications, ask to simplify or elaborate on answer, etc.
- Explore citations and sources
- Click on "settings" to try different options, tweak prompts, etc.

## Clean up

To clean up all the resources created by this sample:

1. Run `azd down`
2. When asked if you are sure you want to continue, enter `y`
3. When asked if you want to permanently delete the resources, enter `y`

The resource group and all the resources will be deleted.

## Guidance

You can find extensive documentation in the [docs](docs/README.md) folder:

- Deploying:
  - [Troubleshooting deployment](docs/deploy_troubleshooting.md)
    - [Debugging the app on App Service](docs/appservice.md)
  - [Deploying with azd: deep dive and CI/CD](docs/azd.md)
  - [Deploying with existing Azure resources](docs/deploy_existing.md)
  - [Deploying from a free account](docs/deploy_lowcost.md)
  - [Enabling optional features](docs/deploy_features.md)
    - [All features](docs/deploy_features.md)
    - [Login and access control](docs/login_and_acl.md)
    - [Multimodal](docs/multimodal.md)
    - [Reasoning](docs/reasoning.md)
    - [Private endpoints](docs/deploy_private.md)
    - [Agentic retrieval](docs/agentic_retrieval.md)
  - [Sharing deployment environments](docs/sharing_environments.md)
- [Local development](docs/localdev.md)
- [Customizing the app](docs/customization.md)
- [App architecture](docs/architecture.md)
- [HTTP Protocol](docs/http_protocol.md)
- [Data ingestion](docs/data_ingestion.md)
- [Evaluation](docs/evaluation.md)
- [Safety evaluation](docs/safety_evaluation.md)
- [Monitoring with Application Insights](docs/monitoring.md)
- [Productionizing](docs/productionizing.md)
- [Alternative RAG chat samples](docs/other_samples.md)

### Resources

- [📖 Docs: Get started using the chat with your data sample](https://learn.microsoft.com/azure/developer/python/get-started-app-chat-template?toc=%2Fazure%2Fdeveloper%2Fai%2Ftoc.json&bc=%2Fazure%2Fdeveloper%2Fai%2Fbreadcrumb%2Ftoc.json&tabs=github-codespaces)
- [📖 Blog: Revolutionize your Enterprise Data with ChatGPT: Next-gen Apps w/ Azure OpenAI and AI Search](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/revolutionize-your-enterprise-data-with-chatgpt-next-gen-apps-w-azure-openai-and/3762087)
- [📖 Docs: Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search)
- [📖 Docs: Azure OpenAI Service](https://learn.microsoft.com/azure/cognitive-services/openai/overview)
- [📖 Docs: Comparing Azure OpenAI and OpenAI](https://learn.microsoft.com/azure/cognitive-services/openai/overview#comparing-azure-openai-and-openai/)
- [📖 Blog: Access Control in Generative AI applications with Azure AI Search](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/access-control-in-generative-ai-applications-with-azure-ai-search/3956408)
- [📺 Talk: Quickly build and deploy OpenAI apps on Azure, infused with your own data](https://www.youtube.com/watch?v=j8i-OM5kwiY)
- [📺 Video: RAG Deep Dive Series](https://techcommunity.microsoft.com/blog/azuredevcommunityblog/rag-deep-dive-watch-all-the-recordings/4383171)

### Getting help

This is a sample built to demonstrate the capabilities of modern Generative AI apps and how they can be built in Azure.
For help with deploying this sample, please post in [GitHub Issues](/issues). If you're a Microsoft employee, you can also post in [our Teams channel](https://aka.ms/azai-python-help).

This repository is supported by the maintainers, _not_ by Microsoft Support,
so please use the support mechanisms described above, and we will do our best to help you out.

For general questions about developing AI solutions on Azure,
join the Azure AI Foundry Developer Community:

[![Azure AI Foundry Discord](https://img.shields.io/badge/Discord-Azure_AI_Foundry_Community_Discord-blue?style=for-the-badge&logo=discord&color=5865f2&logoColor=fff)](https://aka.ms/foundry/discord)
[![Azure AI Foundry Developer Forum](https://img.shields.io/badge/GitHub-Azure_AI_Foundry_Developer_Forum-blue?style=for-the-badge&logo=github&color=000000&logoColor=fff)](https://aka.ms/foundry/forum)

### Note

>Note: The PDF documents used in this demo contain information generated using a language model (Azure OpenAI Service). The information contained in these documents is only for demonstration purposes and does not reflect the opinions or beliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, about the completeness, accuracy, reliability, suitability or availability with respect to the information contained in this document. All rights reserved to Microsoft.


================================================
FILE: SECURITY.md
================================================
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
# Security

Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).

If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](<https://docs.microsoft.com/previous-versions/tn-archive/cc751383(v=technet.10)>), please report it to us as described below.

## Reporting Security Issues

**Please do not report security vulnerabilities through public GitHub issues.**

Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).

If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/msrc/pgp-key-msrc).

You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).

Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:

- Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
- Full paths of source file(s) related to the manifestation of the issue
- The location of the affected source code (tag/branch/commit or direct URL)
- Any special configuration required to reproduce the issue
- Step-by-step instructions to reproduce the issue
- Proof-of-concept or exploit code (if possible)
- Impact of the issue, including how an attacker might exploit the issue

This information will help us triage your report more quickly.

If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.

## Preferred Languages

We prefer all communications to be in English.

## Policy

Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/msrc/cvd).

<!-- END MICROSOFT SECURITY.MD BLOCK -->


================================================
FILE: app/backend/.dockerignore
================================================
.git
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env


================================================
FILE: app/backend/Dockerfile
================================================
FROM python:3.13-bookworm

WORKDIR /app

COPY ./ /app

RUN python -m pip install -r requirements.txt

RUN python -m pip install gunicorn

CMD ["python3", "-m", "gunicorn", "-b", "0.0.0.0:8000", "main:app"]


================================================
FILE: app/backend/app.py
================================================
import dataclasses
import io
import json
import logging
import mimetypes
import os
import time
from collections.abc import AsyncGenerator, Awaitable, Callable
from pathlib import Path
from typing import Any, cast

from azure.cognitiveservices.speech import (
    ResultReason,
    SpeechConfig,
    SpeechSynthesisOutputFormat,
    SpeechSynthesisResult,
    SpeechSynthesizer,
)
from azure.identity.aio import (
    AzureDeveloperCliCredential,
    ManagedIdentityCredential,
    get_bearer_token_provider,
)
from azure.monitor.opentelemetry import configure_azure_monitor
from azure.search.documents.aio import SearchClient
from azure.search.documents.indexes.aio import SearchIndexClient
from azure.search.documents.knowledgebases.aio import KnowledgeBaseRetrievalClient
from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
from opentelemetry.instrumentation.httpx import (
    HTTPXClientInstrumentor,
)
from opentelemetry.instrumentation.openai import OpenAIInstrumentor
from quart import (
    Blueprint,
    Quart,
    abort,
    current_app,
    jsonify,
    make_response,
    request,
    send_file,
    send_from_directory,
)
from quart_cors import cors

from approaches.approach import Approach, DataPoints
from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
from approaches.promptmanager import PromptManager
from chat_history.cosmosdb import chat_history_cosmosdb_bp
from config import (
    CONFIG_AGENTIC_KNOWLEDGEBASE_ENABLED,
    CONFIG_AUTH_CLIENT,
    CONFIG_CHAT_APPROACH,
    CONFIG_CHAT_HISTORY_BROWSER_ENABLED,
    CONFIG_CHAT_HISTORY_COSMOS_ENABLED,
    CONFIG_CREDENTIAL,
    CONFIG_DEFAULT_REASONING_EFFORT,
    CONFIG_DEFAULT_RETRIEVAL_REASONING_EFFORT,
    CONFIG_GLOBAL_BLOB_MANAGER,
    CONFIG_INGESTER,
    CONFIG_KNOWLEDGEBASE_CLIENT,
    CONFIG_KNOWLEDGEBASE_CLIENT_WITH_SHAREPOINT,
    CONFIG_KNOWLEDGEBASE_CLIENT_WITH_WEB,
    CONFIG_KNOWLEDGEBASE_CLIENT_WITH_WEB_AND_SHAREPOINT,
    CONFIG_LANGUAGE_PICKER_ENABLED,
    CONFIG_MULTIMODAL_ENABLED,
    CONFIG_OPENAI_CLIENT,
    CONFIG_QUERY_REWRITING_ENABLED,
    CONFIG_RAG_SEARCH_IMAGE_EMBEDDINGS,
    CONFIG_RAG_SEARCH_TEXT_EMBEDDINGS,
    CONFIG_RAG_SEND_IMAGE_SOURCES,
    CONFIG_RAG_SEND_TEXT_SOURCES,
    CONFIG_REASONING_EFFORT_ENABLED,
    CONFIG_SEARCH_CLIENT,
    CONFIG_SEMANTIC_RANKER_DEPLOYED,
    CONFIG_SHAREPOINT_SOURCE_ENABLED,
    CONFIG_SPEECH_INPUT_ENABLED,
    CONFIG_SPEECH_OUTPUT_AZURE_ENABLED,
    CONFIG_SPEECH_OUTPUT_BROWSER_ENABLED,
    CONFIG_SPEECH_SERVICE_ID,
    CONFIG_SPEECH_SERVICE_LOCATION,
    CONFIG_SPEECH_SERVICE_TOKEN,
    CONFIG_SPEECH_SERVICE_VOICE,
    CONFIG_STREAMING_ENABLED,
    CONFIG_USER_BLOB_MANAGER,
    CONFIG_USER_UPLOAD_ENABLED,
    CONFIG_VECTOR_SEARCH_ENABLED,
    CONFIG_WEB_SOURCE_ENABLED,
)
from core.authentication import AuthenticationHelper
from core.sessionhelper import create_session_id
from decorators import authenticated, authenticated_path
from error import error_dict, error_response
from prepdocs import (
    OpenAIHost,
    setup_embeddings_service,
    setup_file_processors,
    setup_image_embeddings_service,
    setup_openai_client,
    setup_search_info,
)
from prepdocslib.blobmanager import AdlsBlobManager, BlobManager
from prepdocslib.embeddings import ImageEmbeddings
from prepdocslib.filestrategy import UploadUserFileStrategy
from prepdocslib.listfilestrategy import File

bp = Blueprint("routes", __name__, static_folder="static")
# Fix Windows registry issue with mimetypes
mimetypes.add_type("application/javascript", ".js")
mimetypes.add_type("text/css", ".css")


@bp.route("/")
async def index():
    return await bp.send_static_file("index.html")


# Empty page is recommended for login redirect to work.
# See https://github.com/AzureAD/microsoft-authentication-library-for-js/blob/dev/lib/msal-browser/docs/initialization.md#redirecturi-considerations for more information
@bp.route("/redirect")
async def redirect():
    return ""


@bp.route("/favicon.ico")
async def favicon():
    return await bp.send_static_file("favicon.ico")


@bp.route("/assets/<path:path>")
async def assets(path):
    return await send_from_directory(Path(__file__).resolve().parent / "static" / "assets", path)


@bp.route("/content/<path>")
@authenticated_path
async def content_file(path: str, auth_claims: dict[str, Any]):
    """
    Serve content files from blob storage from within the app to keep the example self-contained.
    *** NOTE *** if you are using app services authentication, this route will return unauthorized to all users that are not logged in
    if AZURE_ENFORCE_ACCESS_CONTROL is not set or false, logged in users can access all files regardless of access control
    if AZURE_ENFORCE_ACCESS_CONTROL is set to true, logged in users can only access files they have access to
    This is also slow and memory hungry.
    """
    # Remove page number from path, filename-1.txt -> filename.txt
    # This shouldn't typically be necessary as browsers don't send hash fragments to servers
    if path.find("#page=") > 0:
        path_parts = path.rsplit("#page=", 1)
        path = path_parts[0]
    current_app.logger.info("Opening file %s", path)
    blob_manager: BlobManager = current_app.config[CONFIG_GLOBAL_BLOB_MANAGER]

    # Get bytes and properties from the blob manager
    result = await blob_manager.download_blob(path)

    if result is None:
        current_app.logger.info("Path not found in general Blob container: %s", path)
        if current_app.config[CONFIG_USER_UPLOAD_ENABLED]:
            user_oid = auth_claims["oid"]
            user_blob_manager: AdlsBlobManager = current_app.config[CONFIG_USER_BLOB_MANAGER]
            result = await user_blob_manager.download_blob(path, user_oid=user_oid)
            if result is None:
                current_app.logger.exception("Path not found in DataLake: %s", path)

    if not result:
        abort(404)

    content, properties = result

    if not properties or "content_settings" not in properties:
        abort(404)

    mime_type = properties["content_settings"]["content_type"]
    if mime_type == "application/octet-stream":
        mime_type = mimetypes.guess_type(path)[0] or "application/octet-stream"

    # Create a BytesIO object from the bytes
    blob_file = io.BytesIO(content)
    return await send_file(blob_file, mimetype=mime_type, as_attachment=False, attachment_filename=path)


class JSONEncoder(json.JSONEncoder):
    def default(self, o):
        if dataclasses.is_dataclass(o) and not isinstance(o, type):
            as_dict = dataclasses.asdict(o)
            if isinstance(o, DataPoints):
                # Drop optional data point collections that are not populated to keep API surface stable
                return {k: v for k, v in as_dict.items() if v is not None}
            data_points_payload = as_dict.get("data_points") if isinstance(as_dict, dict) else None
            if isinstance(data_points_payload, dict) and data_points_payload.get("citation_activity_details") is None:
                data_points_payload.pop("citation_activity_details")
            return as_dict
        return super().default(o)


async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str, None]:
    try:
        async for event in r:
            yield json.dumps(event, ensure_ascii=False, cls=JSONEncoder) + "\n"
    except Exception as error:
        logging.exception("Exception while generating response stream: %s", error)
        yield json.dumps(error_dict(error))


@bp.route("/chat", methods=["POST"])
@authenticated
async def chat(auth_claims: dict[str, Any]):
    if not request.is_json:
        return jsonify({"error": "request must be json"}), 415
    request_json = await request.get_json()
    context = request_json.get("context", {})
    context["auth_claims"] = auth_claims
    try:
        approach: Approach = cast(Approach, current_app.config[CONFIG_CHAT_APPROACH])

        # If session state is provided, persists the session state,
        # else creates a new session_id depending on the chat history options enabled.
        session_state = request_json.get("session_state")
        if session_state is None:
            session_state = create_session_id(
                current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED],
                current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED],
            )
        result = await approach.run(
            request_json["messages"],
            context=context,
            session_state=session_state,
        )
        return jsonify(result)
    except Exception as error:
        return error_response(error, "/chat")


@bp.route("/chat/stream", methods=["POST"])
@authenticated
async def chat_stream(auth_claims: dict[str, Any]):
    if not request.is_json:
        return jsonify({"error": "request must be json"}), 415
    request_json = await request.get_json()
    context = request_json.get("context", {})
    context["auth_claims"] = auth_claims
    try:
        approach: Approach = cast(Approach, current_app.config[CONFIG_CHAT_APPROACH])

        # If session state is provided, persists the session state,
        # else creates a new session_id depending on the chat history options enabled.
        session_state = request_json.get("session_state")
        if session_state is None:
            session_state = create_session_id(
                current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED],
                current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED],
            )
        result = await approach.run_stream(
            request_json["messages"],
            context=context,
            session_state=session_state,
        )
        response = await make_response(format_as_ndjson(result))
        response.timeout = None  # type: ignore
        response.mimetype = "application/json-lines"
        return response
    except Exception as error:
        return error_response(error, "/chat")


# Send MSAL.js settings to the client UI
@bp.route("/auth_setup", methods=["GET"])
def auth_setup():
    auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
    return jsonify(auth_helper.get_auth_setup_for_client())


@bp.route("/config", methods=["GET"])
def config():
    return jsonify(
        {
            "showMultimodalOptions": current_app.config[CONFIG_MULTIMODAL_ENABLED],
            "showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
            "showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
            "showReasoningEffortOption": current_app.config[CONFIG_REASONING_EFFORT_ENABLED],
            "streamingEnabled": current_app.config[CONFIG_STREAMING_ENABLED],
            "defaultReasoningEffort": current_app.config[CONFIG_DEFAULT_REASONING_EFFORT],
            "defaultRetrievalReasoningEffort": current_app.config[CONFIG_DEFAULT_RETRIEVAL_REASONING_EFFORT],
            "showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
            "showUserUpload": current_app.config[CONFIG_USER_UPLOAD_ENABLED],
            "showLanguagePicker": current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED],
            "showSpeechInput": current_app.config[CONFIG_SPEECH_INPUT_ENABLED],
            "showSpeechOutputBrowser": current_app.config[CONFIG_SPEECH_OUTPUT_BROWSER_ENABLED],
            "showSpeechOutputAzure": current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED],
            "showChatHistoryBrowser": current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED],
            "showChatHistoryCosmos": current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED],
            "showAgenticRetrievalOption": current_app.config[CONFIG_AGENTIC_KNOWLEDGEBASE_ENABLED],
            "ragSearchTextEmbeddings": current_app.config[CONFIG_RAG_SEARCH_TEXT_EMBEDDINGS],
            "ragSearchImageEmbeddings": current_app.config[CONFIG_RAG_SEARCH_IMAGE_EMBEDDINGS],
            "ragSendTextSources": current_app.config[CONFIG_RAG_SEND_TEXT_SOURCES],
            "ragSendImageSources": current_app.config[CONFIG_RAG_SEND_IMAGE_SOURCES],
            "webSourceEnabled": current_app.config[CONFIG_WEB_SOURCE_ENABLED],
            "sharepointSourceEnabled": current_app.config[CONFIG_SHAREPOINT_SOURCE_ENABLED],
        }
    )


@bp.route("/speech", methods=["POST"])
async def speech():
    if not request.is_json:
        return jsonify({"error": "request must be json"}), 415

    speech_token = current_app.config.get(CONFIG_SPEECH_SERVICE_TOKEN)
    if speech_token is None or speech_token.expires_on < time.time() + 60:
        speech_token = await current_app.config[CONFIG_CREDENTIAL].get_token(
            "https://cognitiveservices.azure.com/.default"
        )
        current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = speech_token

    request_json = await request.get_json()
    text = request_json["text"]
    try:
        # Construct a token as described in documentation:
        # https://learn.microsoft.com/azure/ai-services/speech-service/how-to-configure-azure-ad-auth?pivots=programming-language-python
        auth_token = (
            "aad#"
            + current_app.config[CONFIG_SPEECH_SERVICE_ID]
            + "#"
            + current_app.config[CONFIG_SPEECH_SERVICE_TOKEN].token
        )
        speech_config = SpeechConfig(auth_token=auth_token, region=current_app.config[CONFIG_SPEECH_SERVICE_LOCATION])
        speech_config.speech_synthesis_voice_name = current_app.config[CONFIG_SPEECH_SERVICE_VOICE]
        speech_config.set_speech_synthesis_output_format(SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
        synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
        result: SpeechSynthesisResult = synthesizer.speak_text_async(text).get()
        if result.reason == ResultReason.SynthesizingAudioCompleted:
            return result.audio_data, 200, {"Content-Type": "audio/mp3"}
        elif result.reason == ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            current_app.logger.error(
                "Speech synthesis canceled: %s %s", cancellation_details.reason, cancellation_details.error_details
            )
            raise Exception("Speech synthesis canceled. Check logs for details.")
        else:
            current_app.logger.error("Unexpected result reason: %s", result.reason)
            raise Exception("Speech synthesis failed. Check logs for details.")
    except Exception as e:
        current_app.logger.exception("Exception in /speech")
        return jsonify({"error": str(e)}), 500


@bp.post("/upload")
@authenticated
async def upload(auth_claims: dict[str, Any]):
    request_files = await request.files
    if "file" not in request_files:
        return jsonify({"message": "No file part in the request", "status": "failed"}), 400

    try:
        user_oid = auth_claims["oid"]
        file = request_files.getlist("file")[0]
        adls_manager: AdlsBlobManager = current_app.config[CONFIG_USER_BLOB_MANAGER]
        file_url = await adls_manager.upload_blob(file, file.filename, user_oid)
        ingester: UploadUserFileStrategy = current_app.config[CONFIG_INGESTER]
        await ingester.add_file(File(content=file, url=file_url, acls={"oids": [user_oid]}), user_oid=user_oid)
        return jsonify({"message": "File uploaded successfully"}), 200
    except Exception as error:
        current_app.logger.error("Error uploading file: %s", error)
        return jsonify({"message": "Error uploading file, check server logs for details.", "status": "failed"}), 500


@bp.post("/delete_uploaded")
@authenticated
async def delete_uploaded(auth_claims: dict[str, Any]):
    request_json = await request.get_json()
    filename = request_json.get("filename")
    user_oid = auth_claims["oid"]
    adls_manager: AdlsBlobManager = current_app.config[CONFIG_USER_BLOB_MANAGER]
    await adls_manager.remove_blob(filename, user_oid)
    ingester: UploadUserFileStrategy = current_app.config[CONFIG_INGESTER]
    await ingester.remove_file(filename, user_oid)
    return jsonify({"message": f"File {filename} deleted successfully"}), 200


@bp.get("/list_uploaded")
@authenticated
async def list_uploaded(auth_claims: dict[str, Any]):
    """Lists the uploaded documents for the current user.
    Only returns files directly in the user's directory, not in subdirectories.
    Excludes image files and the images directory."""
    user_oid = auth_claims["oid"]
    adls_manager: AdlsBlobManager = current_app.config[CONFIG_USER_BLOB_MANAGER]
    files = await adls_manager.list_blobs(user_oid)
    return jsonify(files), 200


@bp.before_app_serving
async def setup_clients():
    # Replace these with your own values, either in environment variables or directly here
    AZURE_STORAGE_ACCOUNT = os.environ["AZURE_STORAGE_ACCOUNT"]
    AZURE_STORAGE_CONTAINER = os.environ["AZURE_STORAGE_CONTAINER"]
    AZURE_IMAGESTORAGE_CONTAINER = os.environ.get("AZURE_IMAGESTORAGE_CONTAINER")
    AZURE_USERSTORAGE_ACCOUNT = os.environ.get("AZURE_USERSTORAGE_ACCOUNT")
    AZURE_USERSTORAGE_CONTAINER = os.environ.get("AZURE_USERSTORAGE_CONTAINER")
    AZURE_SEARCH_SERVICE = os.environ["AZURE_SEARCH_SERVICE"]
    AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"
    AZURE_SEARCH_INDEX = os.environ["AZURE_SEARCH_INDEX"]
    AZURE_SEARCH_KNOWLEDGEBASE_NAME = os.getenv("AZURE_SEARCH_KNOWLEDGEBASE_NAME", "")
    # Shared by all OpenAI deployments
    OPENAI_HOST = OpenAIHost(os.getenv("OPENAI_HOST", "azure"))
    OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
    AZURE_OPENAI_KNOWLEDGEBASE_MODEL = os.getenv("AZURE_OPENAI_KNOWLEDGEBASE_MODEL")
    AZURE_OPENAI_KNOWLEDGEBASE_DEPLOYMENT = os.getenv("AZURE_OPENAI_KNOWLEDGEBASE_DEPLOYMENT")
    OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
    OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536)
    OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT")
    # Used with Azure OpenAI deployments
    AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
    AZURE_OPENAI_CHATGPT_DEPLOYMENT = (
        os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT")
        if OPENAI_HOST in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]
        else None
    )
    AZURE_OPENAI_EMB_DEPLOYMENT = (
        os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM] else None
    )
    AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL")
    AZURE_VISION_ENDPOINT = os.getenv("AZURE_VISION_ENDPOINT", "")
    AZURE_OPENAI_API_KEY_OVERRIDE = os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE")
    # Used only with non-Azure OpenAI deployments
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    OPENAI_ORGANIZATION = os.getenv("OPENAI_ORGANIZATION")

    AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
    AZURE_USE_AUTHENTICATION = os.getenv("AZURE_USE_AUTHENTICATION", "").lower() == "true"
    AZURE_ENFORCE_ACCESS_CONTROL = os.getenv("AZURE_ENFORCE_ACCESS_CONTROL", "").lower() == "true"
    AZURE_ENABLE_UNAUTHENTICATED_ACCESS = os.getenv("AZURE_ENABLE_UNAUTHENTICATED_ACCESS", "").lower() == "true"
    AZURE_SERVER_APP_ID = os.getenv("AZURE_SERVER_APP_ID")
    AZURE_SERVER_APP_SECRET = os.getenv("AZURE_SERVER_APP_SECRET")
    AZURE_CLIENT_APP_ID = os.getenv("AZURE_CLIENT_APP_ID")
    AZURE_AUTH_TENANT_ID = os.getenv("AZURE_AUTH_TENANT_ID", AZURE_TENANT_ID)

    KB_FIELDS_CONTENT = os.getenv("KB_FIELDS_CONTENT", "content")
    KB_FIELDS_SOURCEPAGE = os.getenv("KB_FIELDS_SOURCEPAGE", "sourcepage")

    AZURE_SEARCH_QUERY_LANGUAGE = os.getenv("AZURE_SEARCH_QUERY_LANGUAGE") or "en-us"
    AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER") or "lexicon"
    AZURE_SEARCH_SEMANTIC_RANKER = os.getenv("AZURE_SEARCH_SEMANTIC_RANKER", "free").lower()
    AZURE_SEARCH_QUERY_REWRITING = os.getenv("AZURE_SEARCH_QUERY_REWRITING", "false").lower()
    # This defaults to the previous field name "embedding", for backwards compatibility
    AZURE_SEARCH_FIELD_NAME_EMBEDDING = os.getenv("AZURE_SEARCH_FIELD_NAME_EMBEDDING", "embedding")

    AZURE_SPEECH_SERVICE_ID = os.getenv("AZURE_SPEECH_SERVICE_ID")
    AZURE_SPEECH_SERVICE_LOCATION = os.getenv("AZURE_SPEECH_SERVICE_LOCATION")
    AZURE_SPEECH_SERVICE_VOICE = os.getenv("AZURE_SPEECH_SERVICE_VOICE") or "en-US-AndrewMultilingualNeural"

    USE_MULTIMODAL = os.getenv("USE_MULTIMODAL", "").lower() == "true"
    RAG_SEARCH_TEXT_EMBEDDINGS = os.getenv("RAG_SEARCH_TEXT_EMBEDDINGS", "true").lower() == "true"
    RAG_SEARCH_IMAGE_EMBEDDINGS = os.getenv("RAG_SEARCH_IMAGE_EMBEDDINGS", "true").lower() == "true"
    RAG_SEND_TEXT_SOURCES = os.getenv("RAG_SEND_TEXT_SOURCES", "true").lower() == "true"
    RAG_SEND_IMAGE_SOURCES = os.getenv("RAG_SEND_IMAGE_SOURCES", "true").lower() == "true"
    USE_USER_UPLOAD = os.getenv("USE_USER_UPLOAD", "").lower() == "true"
    ENABLE_LANGUAGE_PICKER = os.getenv("ENABLE_LANGUAGE_PICKER", "").lower() == "true"
    USE_SPEECH_INPUT_BROWSER = os.getenv("USE_SPEECH_INPUT_BROWSER", "").lower() == "true"
    USE_SPEECH_OUTPUT_BROWSER = os.getenv("USE_SPEECH_OUTPUT_BROWSER", "").lower() == "true"
    USE_SPEECH_OUTPUT_AZURE = os.getenv("USE_SPEECH_OUTPUT_AZURE", "").lower() == "true"
    USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
    USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
    USE_AGENTIC_KNOWLEDGEBASE = os.getenv("USE_AGENTIC_KNOWLEDGEBASE", "").lower() == "true"
    USE_WEB_SOURCE = os.getenv("USE_WEB_SOURCE", "").lower() == "true"
    USE_SHAREPOINT_SOURCE = os.getenv("USE_SHAREPOINT_SOURCE", "").lower() == "true"
    AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT = os.getenv("AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT", "low")
    USE_VECTORS = os.getenv("USE_VECTORS", "").lower() != "false"

    # WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
    RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None

    # Use the current user identity for keyless authentication to Azure services.
    # This assumes you use 'azd auth login' locally, and managed identity when deployed on Azure.
    # The managed identity is setup in the infra/ folder.
    azure_credential: AzureDeveloperCliCredential | ManagedIdentityCredential
    azure_ai_token_provider: Callable[[], Awaitable[str]]
    if RUNNING_ON_AZURE:
        current_app.logger.info("Setting up Azure credential using ManagedIdentityCredential")
        if AZURE_CLIENT_ID := os.getenv("AZURE_CLIENT_ID"):
            # ManagedIdentityCredential should use AZURE_CLIENT_ID if set in env, but its not working for some reason,
            # so we explicitly pass it in as the client ID here. This is necessary for user-assigned managed identities.
            current_app.logger.info(
                "Setting up Azure credential using ManagedIdentityCredential with client_id %s", AZURE_CLIENT_ID
            )
            azure_credential = ManagedIdentityCredential(client_id=AZURE_CLIENT_ID)
        else:
            current_app.logger.info("Setting up Azure credential using ManagedIdentityCredential")
            azure_credential = ManagedIdentityCredential()
    elif AZURE_TENANT_ID:
        current_app.logger.info(
            "Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID
        )
        azure_credential = AzureDeveloperCliCredential(tenant_id=AZURE_TENANT_ID, process_timeout=60)
    else:
        current_app.logger.info("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
        azure_credential = AzureDeveloperCliCredential(process_timeout=60)
    azure_ai_token_provider = get_bearer_token_provider(
        azure_credential, "https://cognitiveservices.azure.com/.default"
    )

    # Set the Azure credential in the app config for use in other parts of the app
    current_app.config[CONFIG_CREDENTIAL] = azure_credential

    # Set up clients for AI Search and Storage
    search_client = SearchClient(
        endpoint=AZURE_SEARCH_ENDPOINT,
        index_name=AZURE_SEARCH_INDEX,
        credential=azure_credential,
    )

    knowledgebase_client = KnowledgeBaseRetrievalClient(
        endpoint=AZURE_SEARCH_ENDPOINT, knowledge_base_name=AZURE_SEARCH_KNOWLEDGEBASE_NAME, credential=azure_credential
    )
    knowledgebase_client_with_web = None
    knowledgebase_client_with_sharepoint = None
    knowledgebase_client_with_web_and_sharepoint = None

    if AZURE_SEARCH_KNOWLEDGEBASE_NAME:
        if USE_WEB_SOURCE:
            knowledgebase_client_with_web = KnowledgeBaseRetrievalClient(
                endpoint=AZURE_SEARCH_ENDPOINT,
                knowledge_base_name=f"{AZURE_SEARCH_KNOWLEDGEBASE_NAME}-with-web",
                credential=azure_credential,
            )
        if USE_SHAREPOINT_SOURCE:
            knowledgebase_client_with_sharepoint = KnowledgeBaseRetrievalClient(
                endpoint=AZURE_SEARCH_ENDPOINT,
                knowledge_base_name=f"{AZURE_SEARCH_KNOWLEDGEBASE_NAME}-with-sp",
                credential=azure_credential,
            )
        if USE_WEB_SOURCE and USE_SHAREPOINT_SOURCE:
            knowledgebase_client_with_web_and_sharepoint = KnowledgeBaseRetrievalClient(
                endpoint=AZURE_SEARCH_ENDPOINT,
                knowledge_base_name=f"{AZURE_SEARCH_KNOWLEDGEBASE_NAME}-with-web-and-sp",
                credential=azure_credential,
            )

    # Set up the global blob storage manager (used for global content/images, but not user uploads)
    global_blob_manager = BlobManager(
        endpoint=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net",
        credential=azure_credential,
        container=AZURE_STORAGE_CONTAINER,
        image_container=AZURE_IMAGESTORAGE_CONTAINER,
    )
    current_app.config[CONFIG_GLOBAL_BLOB_MANAGER] = global_blob_manager

    # Set up authentication helper
    search_index = None
    if AZURE_USE_AUTHENTICATION:
        current_app.logger.info("AZURE_USE_AUTHENTICATION is true, setting up search index client")
        search_index_client = SearchIndexClient(
            endpoint=AZURE_SEARCH_ENDPOINT,
            credential=azure_credential,
        )
        search_index = await search_index_client.get_index(AZURE_SEARCH_INDEX)
        await search_index_client.close()
    auth_helper = AuthenticationHelper(
        search_index=search_index,
        use_authentication=AZURE_USE_AUTHENTICATION,
        server_app_id=AZURE_SERVER_APP_ID,
        server_app_secret=AZURE_SERVER_APP_SECRET,
        client_app_id=AZURE_CLIENT_APP_ID,
        tenant_id=AZURE_AUTH_TENANT_ID,
        enforce_access_control=AZURE_ENFORCE_ACCESS_CONTROL,
        enable_unauthenticated_access=AZURE_ENABLE_UNAUTHENTICATED_ACCESS,
    )

    if USE_SPEECH_OUTPUT_AZURE:
        current_app.logger.info("USE_SPEECH_OUTPUT_AZURE is true, setting up Azure speech service")
        if not AZURE_SPEECH_SERVICE_ID or AZURE_SPEECH_SERVICE_ID == "":
            raise ValueError("Azure speech resource not configured correctly, missing AZURE_SPEECH_SERVICE_ID")
        if not AZURE_SPEECH_SERVICE_LOCATION or AZURE_SPEECH_SERVICE_LOCATION == "":
            raise ValueError("Azure speech resource not configured correctly, missing AZURE_SPEECH_SERVICE_LOCATION")
        current_app.config[CONFIG_SPEECH_SERVICE_ID] = AZURE_SPEECH_SERVICE_ID
        current_app.config[CONFIG_SPEECH_SERVICE_LOCATION] = AZURE_SPEECH_SERVICE_LOCATION
        current_app.config[CONFIG_SPEECH_SERVICE_VOICE] = AZURE_SPEECH_SERVICE_VOICE
        # Wait until token is needed to fetch for the first time
        current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = None

    openai_client, azure_openai_endpoint = setup_openai_client(
        openai_host=OPENAI_HOST,
        azure_credential=azure_credential,
        azure_openai_service=AZURE_OPENAI_SERVICE,
        azure_openai_custom_url=AZURE_OPENAI_CUSTOM_URL,
        azure_openai_api_key=AZURE_OPENAI_API_KEY_OVERRIDE,
        openai_api_key=OPENAI_API_KEY,
        openai_organization=OPENAI_ORGANIZATION,
    )

    user_blob_manager = None
    if USE_USER_UPLOAD:
        current_app.logger.info("USE_USER_UPLOAD is true, setting up user upload feature")
        if not AZURE_USERSTORAGE_ACCOUNT or not AZURE_USERSTORAGE_CONTAINER:
            raise ValueError(
                "AZURE_USERSTORAGE_ACCOUNT and AZURE_USERSTORAGE_CONTAINER must be set when USE_USER_UPLOAD is true"
            )
        if not AZURE_ENFORCE_ACCESS_CONTROL:
            raise ValueError("AZURE_ENFORCE_ACCESS_CONTROL must be true when USE_USER_UPLOAD is true")
        user_blob_manager = AdlsBlobManager(
            endpoint=f"https://{AZURE_USERSTORAGE_ACCOUNT}.dfs.core.windows.net",
            container=AZURE_USERSTORAGE_CONTAINER,
            credential=azure_credential,
        )
        current_app.config[CONFIG_USER_BLOB_MANAGER] = user_blob_manager

        # Set up ingester
        file_processors, figure_processor = setup_file_processors(
            azure_credential=azure_credential,
            document_intelligence_service=os.getenv("AZURE_DOCUMENTINTELLIGENCE_SERVICE"),
            local_pdf_parser=os.getenv("USE_LOCAL_PDF_PARSER", "").lower() == "true",
            local_html_parser=os.getenv("USE_LOCAL_HTML_PARSER", "").lower() == "true",
            use_content_understanding=os.getenv("USE_CONTENT_UNDERSTANDING", "").lower() == "true",
            content_understanding_endpoint=os.getenv("AZURE_CONTENTUNDERSTANDING_ENDPOINT"),
            use_multimodal=USE_MULTIMODAL,
            openai_client=openai_client,
            openai_model=OPENAI_CHATGPT_MODEL,
            openai_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT if OPENAI_HOST == OpenAIHost.AZURE else None,
        )
        search_info = setup_search_info(
            search_service=AZURE_SEARCH_SERVICE,
            index_name=AZURE_SEARCH_INDEX,
            azure_credential=azure_credential,
            use_agentic_knowledgebase=USE_AGENTIC_KNOWLEDGEBASE,
            azure_openai_endpoint=azure_openai_endpoint,
            knowledgebase_name=AZURE_SEARCH_KNOWLEDGEBASE_NAME,
            azure_openai_knowledgebase_deployment=AZURE_OPENAI_KNOWLEDGEBASE_DEPLOYMENT,
            azure_openai_knowledgebase_model=AZURE_OPENAI_KNOWLEDGEBASE_MODEL,
        )

        text_embeddings_service = None
        if USE_VECTORS:
            text_embeddings_service = setup_embeddings_service(
                open_ai_client=openai_client,
                openai_host=OPENAI_HOST,
                emb_model_name=OPENAI_EMB_MODEL,
                emb_model_dimensions=OPENAI_EMB_DIMENSIONS,
                azure_openai_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
                azure_openai_endpoint=azure_openai_endpoint,
            )

        image_embeddings_service = setup_image_embeddings_service(
            azure_credential=azure_credential,
            vision_endpoint=AZURE_VISION_ENDPOINT,
            use_multimodal=USE_MULTIMODAL,
        )
        ingester = UploadUserFileStrategy(
            search_info=search_info,
            file_processors=file_processors,
            embeddings=text_embeddings_service,
            image_embeddings=image_embeddings_service,
            search_field_name_embedding=AZURE_SEARCH_FIELD_NAME_EMBEDDING,
            blob_manager=user_blob_manager,
            figure_processor=figure_processor,
        )
        current_app.config[CONFIG_INGESTER] = ingester

    image_embeddings_client = None
    if USE_MULTIMODAL:
        image_embeddings_client = ImageEmbeddings(AZURE_VISION_ENDPOINT, azure_ai_token_provider)

    current_app.config[CONFIG_OPENAI_CLIENT] = openai_client
    current_app.config[CONFIG_SEARCH_CLIENT] = search_client
    current_app.config[CONFIG_KNOWLEDGEBASE_CLIENT] = knowledgebase_client
    current_app.config[CONFIG_KNOWLEDGEBASE_CLIENT_WITH_WEB] = knowledgebase_client_with_web
    current_app.config[CONFIG_KNOWLEDGEBASE_CLIENT_WITH_SHAREPOINT] = knowledgebase_client_with_sharepoint
    current_app.config[CONFIG_KNOWLEDGEBASE_CLIENT_WITH_WEB_AND_SHAREPOINT] = (
        knowledgebase_client_with_web_and_sharepoint
    )
    current_app.config[CONFIG_AUTH_CLIENT] = auth_helper

    current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
    current_app.config[CONFIG_QUERY_REWRITING_ENABLED] = (
        AZURE_SEARCH_QUERY_REWRITING == "true" and AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
    )
    current_app.config[CONFIG_DEFAULT_REASONING_EFFORT] = OPENAI_REASONING_EFFORT
    current_app.config[CONFIG_DEFAULT_RETRIEVAL_REASONING_EFFORT] = AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT
    current_app.config[CONFIG_REASONING_EFFORT_ENABLED] = OPENAI_CHATGPT_MODEL in Approach.GPT_REASONING_MODELS
    current_app.config[CONFIG_STREAMING_ENABLED] = (
        OPENAI_CHATGPT_MODEL not in Approach.GPT_REASONING_MODELS
        or Approach.GPT_REASONING_MODELS[OPENAI_CHATGPT_MODEL].streaming
    )
    current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = bool(USE_VECTORS)
    current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
    current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER
    current_app.config[CONFIG_SPEECH_INPUT_ENABLED] = USE_SPEECH_INPUT_BROWSER
    current_app.config[CONFIG_SPEECH_OUTPUT_BROWSER_ENABLED] = USE_SPEECH_OUTPUT_BROWSER
    current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED] = USE_SPEECH_OUTPUT_AZURE
    current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
    current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS
    current_app.config[CONFIG_AGENTIC_KNOWLEDGEBASE_ENABLED] = USE_AGENTIC_KNOWLEDGEBASE
    current_app.config[CONFIG_MULTIMODAL_ENABLED] = USE_MULTIMODAL
    current_app.config[CONFIG_RAG_SEARCH_TEXT_EMBEDDINGS] = RAG_SEARCH_TEXT_EMBEDDINGS
    current_app.config[CONFIG_RAG_SEARCH_IMAGE_EMBEDDINGS] = RAG_SEARCH_IMAGE_EMBEDDINGS
    current_app.config[CONFIG_RAG_SEND_TEXT_SOURCES] = RAG_SEND_TEXT_SOURCES
    current_app.config[CONFIG_RAG_SEND_IMAGE_SOURCES] = RAG_SEND_IMAGE_SOURCES
    current_app.config[CONFIG_WEB_SOURCE_ENABLED] = USE_WEB_SOURCE
    if AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT == "minimal" and current_app.config[CONFIG_WEB_SOURCE_ENABLED]:
        raise ValueError("Web source cannot be used with minimal retrieval reasoning effort")
    current_app.config[CONFIG_SHAREPOINT_SOURCE_ENABLED] = USE_SHAREPOINT_SOURCE

    prompt_manager = PromptManager()

    # ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
    current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
        search_client=search_client,
        search_index_name=AZURE_SEARCH_INDEX,
        knowledgebase_model=AZURE_OPENAI_KNOWLEDGEBASE_MODEL,
        knowledgebase_deployment=AZURE_OPENAI_KNOWLEDGEBASE_DEPLOYMENT,
        knowledgebase_client=knowledgebase_client,
        knowledgebase_client_with_web=knowledgebase_client_with_web,
        knowledgebase_client_with_sharepoint=knowledgebase_client_with_sharepoint,
        knowledgebase_client_with_web_and_sharepoint=knowledgebase_client_with_web_and_sharepoint,
        openai_client=openai_client,
        chatgpt_model=OPENAI_CHATGPT_MODEL,
        chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
        embedding_model=OPENAI_EMB_MODEL,
        embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
        embedding_dimensions=OPENAI_EMB_DIMENSIONS,
        embedding_field=AZURE_SEARCH_FIELD_NAME_EMBEDDING,
        sourcepage_field=KB_FIELDS_SOURCEPAGE,
        content_field=KB_FIELDS_CONTENT,
        query_language=AZURE_SEARCH_QUERY_LANGUAGE,
        query_speller=AZURE_SEARCH_QUERY_SPELLER,
        prompt_manager=prompt_manager,
        reasoning_effort=OPENAI_REASONING_EFFORT,
        multimodal_enabled=USE_MULTIMODAL,
        image_embeddings_client=image_embeddings_client,
        global_blob_manager=global_blob_manager,
        user_blob_manager=user_blob_manager,
        use_web_source=current_app.config[CONFIG_WEB_SOURCE_ENABLED],
        use_sharepoint_source=current_app.config[CONFIG_SHAREPOINT_SOURCE_ENABLED],
        retrieval_reasoning_effort=AGENTIC_KNOWLEDGEBASE_REASONING_EFFORT,
    )


@bp.after_app_serving
async def close_clients():
    await current_app.config[CONFIG_SEARCH_CLIENT].close()
    await current_app.config[CONFIG_GLOBAL_BLOB_MANAGER].close_clients()
    if user_blob_manager := current_app.config.get(CONFIG_USER_BLOB_MANAGER):
        await user_blob_manager.close_clients()
    await current_app.config[CONFIG_CREDENTIAL].close()


def create_app():
    app = Quart(__name__)
    app.register_blueprint(bp)
    app.register_blueprint(chat_history_cosmosdb_bp)

    if os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"):
        app.logger.info("APPLICATIONINSIGHTS_CONNECTION_STRING is set, enabling Azure Monitor")
        configure_azure_monitor(
            instrumentation_options={
                "django": {"enabled": False},
                "psycopg2": {"enabled": False},
                "fastapi": {"enabled": False},
            }
        )
        # This tracks HTTP requests made by aiohttp:
        AioHttpClientInstrumentor().instrument()
        # This tracks HTTP requests made by httpx:
        HTTPXClientInstrumentor().instrument()
        # This tracks OpenAI SDK requests:
        OpenAIInstrumentor().instrument()
        # This middleware tracks app route requests:
        app.asgi_app = OpenTelemetryMiddleware(app.asgi_app)  # type: ignore[assignment]

    # Log levels should be one of https://docs.python.org/3/library/logging.html#logging-levels
    # Set root level to WARNING to avoid seeing overly verbose logs from SDKS
    logging.basicConfig(level=logging.WARNING)
    # Set our own logger levels to INFO by default
    app_level = os.getenv("APP_LOG_LEVEL", "INFO")
    app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level))
    logging.getLogger("scripts").setLevel(app_level)

    if allowed_origin := os.getenv("ALLOWED_ORIGIN"):
        allowed_origins = allowed_origin.split(";")
        if len(allowed_origins) > 0:
            app.logger.info("CORS enabled for %s", allowed_origins)
            cors(app, allow_origin=allowed_origins, allow_methods=["GET", "POST"])

    return app


================================================
FILE: app/backend/approaches/__init__.py
================================================


================================================
FILE: app/backend/approaches/approach.py
================================================
import base64
import json
import re
from abc import ABC
from collections.abc import AsyncGenerator, Awaitable
from dataclasses import asdict, dataclass, field
from typing import Any, Optional, TypedDict, cast

from azure.search.documents.aio import SearchClient
from azure.search.documents.knowledgebases.aio import KnowledgeBaseRetrievalClient
from azure.search.documents.knowledgebases.models import (
    KnowledgeBaseMessage,
    KnowledgeBaseMessageTextContent,
    KnowledgeBaseRemoteSharePointActivityRecord,
    KnowledgeBaseRemoteSharePointReference,
    KnowledgeBaseRetrievalRequest,
    KnowledgeBaseRetrievalResponse,
    KnowledgeBaseSearchIndexActivityRecord,
    KnowledgeBaseSearchIndexReference,
    KnowledgeBaseWebActivityRecord,
    KnowledgeBaseWebReference,
    KnowledgeRetrievalLowReasoningEffort,
    KnowledgeRetrievalMediumReasoningEffort,
    KnowledgeRetrievalMinimalReasoningEffort,
    KnowledgeRetrievalSemanticIntent,
    KnowledgeSourceParams,
    RemoteSharePointKnowledgeSourceParams,
    SearchIndexKnowledgeSourceParams,
    WebKnowledgeSourceParams,
)
from azure.search.documents.models import (
    QueryCaptionResult,
    QueryType,
    VectorizedQuery,
    VectorQuery,
)
from openai import AsyncOpenAI, AsyncStream
from openai.types import CompletionUsage
from openai.types.chat import (
    ChatCompletion,
    ChatCompletionChunk,
    ChatCompletionMessageFunctionToolCall,
    ChatCompletionMessageParam,
    ChatCompletionReasoningEffort,
    ChatCompletionToolParam,
)

from approaches.promptmanager import PromptManager
from prepdocslib.blobmanager import AdlsBlobManager, BlobManager
from prepdocslib.embeddings import ImageEmbeddings


@dataclass
class ActivityDetail:
    id: int
    number: int
    type: str
    source: str
    query: str


@dataclass
class Document:
    id: Optional[str] = None
    ref_id: Optional[str] = None  # Reference id from agentic retrieval (if applicable)
    content: Optional[str] = None
    category: Optional[str] = None
    sourcepage: Optional[str] = None
    sourcefile: Optional[str] = None
    oids: Optional[list[str]] = None
    groups: Optional[list[str]] = None
    captions: Optional[list[QueryCaptionResult]] = None
    score: Optional[float] = None
    reranker_score: Optional[float] = None
    activity: Optional[ActivityDetail] = None
    images: Optional[list[dict[str, Any]]] = None

    def serialize_for_results(self) -> dict[str, Any]:
        result_dict = {
            "type": "searchIndex",
            "id": self.id,
            "content": self.content,
            "category": self.category,
            "sourcepage": self.sourcepage,
            "sourcefile": self.sourcefile,
            "oids": self.oids,
            "groups": self.groups,
            "captions": (
                [
                    {
                        "additional_properties": caption.additional_properties,
                        "text": caption.text,
                        "highlights": caption.highlights,
                    }
                    for caption in self.captions
                ]
                if self.captions
                else []
            ),
            "score": self.score,
            "reranker_score": self.reranker_score,
            "activity": asdict(self.activity) if self.activity else None,
            "images": self.images,
        }
        return result_dict


@dataclass
class WebResult:
    id: Optional[str] = None
    title: Optional[str] = None
    url: Optional[str] = None
    activity: Optional[ActivityDetail] = None

    def serialize_for_results(self) -> dict[str, Any]:
        return {
            "type": "web",
            "id": self.id,
            "ref_id": str(self.id),
            "title": self.title,
            "url": self.url,
            "activity": asdict(self.activity) if self.activity else None,
        }


@dataclass
class SharePointResult:
    id: Optional[str] = None
    web_url: Optional[str] = None
    content: Optional[str] = None
    title: Optional[str] = None
    reranker_score: Optional[float] = None
    activity: Optional[ActivityDetail] = None

    def serialize_for_results(self) -> dict[str, Any]:
        return {
            "type": "remoteSharePoint",
            "id": self.id,
            "ref_id": str(self.id),
            "web_url": self.web_url,
            "content": self.content,
            "title": self.title,
            "reranker_score": self.reranker_score,
            "activity": asdict(self.activity) if self.activity else None,
        }


@dataclass
class RewriteQueryResult:
    query: str
    messages: list[ChatCompletionMessageParam]
    completion: ChatCompletion
    reasoning_effort: ChatCompletionReasoningEffort


@dataclass
class ThoughtStep:
    title: str
    description: Optional[Any]
    props: Optional[dict[str, Any]] = None

    def update_token_usage(self, usage: CompletionUsage) -> None:
        if self.props:
            self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage)


@dataclass
class AgenticRetrievalResults:
    """Results from agentic retrieval including activities, documents, web results, SharePoint results, and optional answer."""

    response: KnowledgeBaseRetrievalResponse
    documents: list[Document]
    web_results: list[WebResult]
    sharepoint_results: list[SharePointResult] = field(default_factory=list)
    answer: Optional[str] = None  # Synthesized answer when web knowledge source is used
    rewrite_result: Optional[RewriteQueryResult] = None
    activity_details_by_id: Optional[dict[int, ActivityDetail]] = None
    thoughts: list[ThoughtStep] = field(default_factory=list)


@dataclass
class DataPoints:
    text: Optional[list[str]] = None
    images: Optional[list] = None
    citations: Optional[list[str]] = None
    external_results_metadata: Optional[list[dict[str, Any]]] = None
    citation_activity_details: Optional[dict[str, dict[str, Any]]] = None


@dataclass
class ExtraInfo:
    data_points: DataPoints
    thoughts: list[ThoughtStep] = field(default_factory=list)
    followup_questions: Optional[list[Any]] = None
    answer: Optional[str] = None  # Only when web knowledge source is used


@dataclass
class TokenUsageProps:
    prompt_tokens: int
    completion_tokens: int
    reasoning_tokens: Optional[int]
    total_tokens: int

    @classmethod
    def from_completion_usage(cls, usage: CompletionUsage) -> "TokenUsageProps":
        return cls(
            prompt_tokens=usage.prompt_tokens,
            completion_tokens=usage.completion_tokens,
            reasoning_tokens=(
                usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else None
            ),
            total_tokens=usage.total_tokens,
        )


# GPT reasoning models don't support the same set of parameters as other models
# https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning
@dataclass
class GPTReasoningModelSupport:
    streaming: bool
    minimal_effort: bool


class Approach(ABC):
    # List of GPT reasoning models support
    GPT_REASONING_MODELS = {
        "o1": GPTReasoningModelSupport(streaming=False, minimal_effort=False),
        "o3": GPTReasoningModelSupport(streaming=True, minimal_effort=False),
        "o3-mini": GPTReasoningModelSupport(streaming=True, minimal_effort=False),
        "o4-mini": GPTReasoningModelSupport(streaming=True, minimal_effort=False),
        "gpt-5": GPTReasoningModelSupport(streaming=True, minimal_effort=True),
        "gpt-5-nano": GPTReasoningModelSupport(streaming=True, minimal_effort=True),
        "gpt-5-mini": GPTReasoningModelSupport(streaming=True, minimal_effort=True),
    }
    # Set a higher token limit for GPT reasoning models
    RESPONSE_DEFAULT_TOKEN_LIMIT = 1024
    RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT = 8192
    QUERY_REWRITE_NO_RESPONSE = "0"

    def __init__(
        self,
        search_client: SearchClient,
        openai_client: AsyncOpenAI,
        knowledgebase_model: Optional[str],
        knowledgebase_deployment: Optional[str],
        query_language: Optional[str],
        query_speller: Optional[str],
        embedding_deployment: Optional[str],  # Not needed for non-Azure OpenAI or for retrieval_mode="text"
        embedding_model: str,
        embedding_dimensions: int,
        embedding_field: str,
        openai_host: str,
        chatgpt_model: str,
        chatgpt_deployment: Optional[str],  # Not needed for non-Azure OpenAI
        prompt_manager: PromptManager,
        reasoning_effort: Optional[str] = None,
        multimodal_enabled: bool = False,
        image_embeddings_client: Optional[ImageEmbeddings] = None,
        global_blob_manager: Optional[BlobManager] = None,
        user_blob_manager: Optional[AdlsBlobManager] = None,
    ):
        self.search_client = search_client
        self.openai_client = openai_client
        self.query_language = query_language
        self.query_speller = query_speller
        self.knowledgebase_model = knowledgebase_model
        self.knowledgebase_deployment = knowledgebase_deployment
        self.embedding_deployment = embedding_deployment
        self.embedding_model = embedding_model
        self.embedding_dimensions = embedding_dimensions
        self.embedding_field = embedding_field
        self.openai_host = openai_host
        self.chatgpt_model = chatgpt_model
        self.chatgpt_deployment = chatgpt_deployment
        self.prompt_manager = prompt_manager
        self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
        self.reasoning_effort = reasoning_effort
        self.include_token_usage = True
        self.multimodal_enabled = multimodal_enabled
        self.image_embeddings_client = image_embeddings_client
        self.global_blob_manager = global_blob_manager
        self.user_blob_manager = user_blob_manager

    def build_filter(self, overrides: dict[str, Any]) -> Optional[str]:
        include_category = overrides.get("include_category")
        exclude_category = overrides.get("exclude_category")
        filters = []
        if include_category:
            filters.append("category eq '{}'".format(include_category.replace("'", "''")))
        if exclude_category:
            filters.append("category ne '{}'".format(exclude_category.replace("'", "''")))
        return None if not filters else " and ".join(filters)

    async def search(
        self,
        top: int,
        query_text: Optional[str],
        filter: Optional[str],
        vectors: list[VectorQuery],
        use_text_search: bool,
        use_vector_search: bool,
        use_semantic_ranker: bool,
        use_semantic_captions: bool,
        minimum_search_score: Optional[float] = None,
        minimum_reranker_score: Optional[float] = None,
        use_query_rewriting: Optional[bool] = None,
        access_token: Optional[str] = None,
    ) -> list[Document]:
        search_text = query_text if use_text_search else ""
        search_vectors = vectors if use_vector_search else []
        if use_semantic_ranker:
            results = await self.search_client.search(
                search_text=search_text,
                filter=filter,
                top=top,
                query_caption="extractive|highlight-false" if use_semantic_captions else None,
                query_rewrites="generative" if use_query_rewriting else None,
                vector_queries=search_vectors,
                query_type=QueryType.SEMANTIC,
                query_language=self.query_language,
                query_speller=self.query_speller,
                semantic_configuration_name="default",
                semantic_query=query_text,
                x_ms_query_source_authorization=access_token,
            )
        else:
            results = await self.search_client.search(
                search_text=search_text,
                filter=filter,
                top=top,
                vector_queries=search_vectors,
                x_ms_query_source_authorization=access_token,
            )

        documents: list[Document] = []
        async for page in results.by_page():
            async for document in page:
                documents.append(
                    Document(
                        id=document.get("id"),
                        content=document.get("content"),
                        category=document.get("category"),
                        sourcepage=document.get("sourcepage"),
                        sourcefile=document.get("sourcefile"),
                        oids=document.get("oids"),
                        groups=document.get("groups"),
                        captions=cast(list[QueryCaptionResult], document.get("@search.captions")),
                        score=document.get("@search.score"),
                        reranker_score=document.get("@search.reranker_score"),
                        images=document.get("images"),
                    )
                )

            qualified_documents = [
                doc
                for doc in documents
                if (
                    (doc.score or 0) >= (minimum_search_score or 0)
                    and (doc.reranker_score or 0) >= (minimum_reranker_score or 0)
                )
            ]

        return qualified_documents

    def extract_rewritten_query(
        self,
        chat_completion: ChatCompletion,
        user_query: str,
        no_response_token: Optional[str] = None,
    ) -> str:
        response_message = chat_completion.choices[0].message

        if response_message.tool_calls:
            for tool_call in response_message.tool_calls:
                if tool_call.type != "function":
                    continue
                arguments_payload = cast(ChatCompletionMessageFunctionToolCall, tool_call).function.arguments or "{}"
                try:
                    parsed_arguments = json.loads(arguments_payload)
                except json.JSONDecodeError:
                    continue
                search_query = parsed_arguments.get("search_query")
                if search_query and (no_response_token is None or search_query != no_response_token):
                    return search_query

        if response_message.content:
            candidate = response_message.content.strip()
            if candidate and (no_response_token is None or candidate != no_response_token):
                return candidate

        return user_query

    async def rewrite_query(
        self,
        *,
        prompt_template: str,
        prompt_variables: dict[str, Any],
        overrides: dict[str, Any],
        chatgpt_model: str,
        chatgpt_deployment: Optional[str],
        user_query: str,
        response_token_limit: int,
        tools: Optional[list[ChatCompletionToolParam]] = None,
        temperature: float = 0.0,
        no_response_token: Optional[str] = None,
    ) -> RewriteQueryResult:
        query_messages = [self.prompt_manager.build_system_prompt(prompt_template, prompt_variables)]
        rewrite_reasoning_effort = self.get_lowest_reasoning_effort(self.chatgpt_model)

        chat_completion = cast(
            ChatCompletion,
            await self.create_chat_completion(
                chatgpt_deployment,
                chatgpt_model,
                messages=query_messages,
                overrides=overrides,
                response_token_limit=response_token_limit,
                temperature=temperature,
                tools=tools,
                reasoning_effort=rewrite_reasoning_effort,
            ),
        )

        rewritten_query = self.extract_rewritten_query(
            chat_completion,
            user_query,
            no_response_token=no_response_token,
        )

        return RewriteQueryResult(
            query=rewritten_query,
            messages=query_messages,
            completion=chat_completion,
            reasoning_effort=rewrite_reasoning_effort,
        )

    async def run_agentic_retrieval(
        self,
        messages: list[ChatCompletionMessageParam],
        knowledgebase_client: KnowledgeBaseRetrievalClient,
        search_index_name: str,
        filter_add_on: Optional[str] = None,
        minimum_reranker_score: Optional[float] = None,
        access_token: Optional[str] = None,
        use_web_source: bool = False,
        use_sharepoint_source: bool = False,
        retrieval_reasoning_effort: Optional[str] = None,
        should_rewrite_query: bool = True,
    ) -> AgenticRetrievalResults:
        # STEP 1: Invoke agentic retrieval
        thoughts = []

        knowledge_source_params = [
            SearchIndexKnowledgeSourceParams(
                knowledge_source_name=search_index_name,
                filter_add_on=filter_add_on,
                include_references=True,
                include_reference_source_data=True,
                always_query_source=False,
                reranker_threshold=minimum_reranker_score,
            )
        ]
        # Build list as KnowledgeSourceParams for type variance
        knowledge_source_params_list: list[KnowledgeSourceParams] = cast(
            list[KnowledgeSourceParams], knowledge_source_params
        )

        if use_web_source:
            knowledge_source_params_list.append(
                WebKnowledgeSourceParams(
                    knowledge_source_name="web",
                    include_references=True,
                    include_reference_source_data=True,
                    always_query_source=False,
                )
            )

        if use_sharepoint_source:
            knowledge_source_params_list.append(
                RemoteSharePointKnowledgeSourceParams(
                    knowledge_source_name="sharepoint",
                    include_references=True,
                    include_reference_source_data=True,
                    always_query_source=False,
                )
            )

        agentic_retrieval_input: dict[str, Any] = {}
        rewrite_result = None
        if retrieval_reasoning_effort == "minimal" and should_rewrite_query:
            original_user_query = messages[-1]["content"]
            if not isinstance(original_user_query, str):
                raise ValueError("The most recent message content must be a string.")

            rewrite_result = await self.rewrite_query(
                prompt_template="query_rewrite.system.jinja2",
                prompt_variables={"user_query": original_user_query, "past_messages": messages[:-1]},
                overrides={},
                chatgpt_model=self.chatgpt_model,
                chatgpt_deployment=self.chatgpt_deployment,
                user_query=original_user_query,
                response_token_limit=self.get_response_token_limit(
                    self.chatgpt_model, 100
                ),  # Setting too low risks malformed JSON, setting too high may affect performance
                tools=self.query_rewrite_tools,
                temperature=0.0,  # Minimize creativity for search query generation
                no_response_token=self.QUERY_REWRITE_NO_RESPONSE,
            )
            thoughts.append(
                self.format_thought_step_for_chatcompletion(
                    title="Prompt to generate search query",
                    messages=rewrite_result.messages,
                    overrides={},
                    model=self.chatgpt_model,
                    deployment=self.chatgpt_deployment,
                    usage=rewrite_result.completion.usage,
                    reasoning_effort=rewrite_result.reasoning_effort,
                )
            )
            agentic_retrieval_input["intents"] = [KnowledgeRetrievalSemanticIntent(search=rewrite_result.query)]
        elif retrieval_reasoning_effort == "minimal":
            last_content = messages[-1]["content"]
            if not isinstance(last_content, str):
                raise ValueError("The most recent message content must be a string.")
            agentic_retrieval_input["intents"] = [KnowledgeRetrievalSemanticIntent(search=last_content)]
        else:
            kb_messages: list[KnowledgeBaseMessage] = [
                KnowledgeBaseMessage(
                    role=str(msg["role"]), content=[KnowledgeBaseMessageTextContent(text=str(msg["content"]))]
                )
                for msg in messages
                if msg["role"] != "system"
            ]
            agentic_retrieval_input["messages"] = kb_messages
        # When we're not using a web source, set output mode to extractiveData to avoid synthesized answer
        if not use_web_source:
            agentic_retrieval_input["output_mode"] = "extractiveData"

        retrieval_effort: Optional[
            KnowledgeRetrievalMinimalReasoningEffort
            | KnowledgeRetrievalLowReasoningEffort
            | KnowledgeRetrievalMediumReasoningEffort
        ] = None
        if retrieval_reasoning_effort == "minimal":
            retrieval_effort = KnowledgeRetrievalMinimalReasoningEffort()
        elif retrieval_reasoning_effort == "low":
            retrieval_effort = KnowledgeRetrievalLowReasoningEffort()
        elif retrieval_reasoning_effort == "medium":
            retrieval_effort = KnowledgeRetrievalMediumReasoningEffort()

        request_kwargs: dict[str, Any] = {
            "knowledge_source_params": knowledge_source_params_list,
            "include_activity": True,
            "retrieval_reasoning_effort": retrieval_effort,
        }
        request_kwargs.update(agentic_retrieval_input)

        response = await knowledgebase_client.retrieve(
            retrieval_request=KnowledgeBaseRetrievalRequest(**request_kwargs),
            x_ms_query_source_authorization=access_token,
        )

        # Map activity id -> agent's internal search query and citation
        activities = response.activity or []
        activity_details_by_id: dict[int, ActivityDetail] = {}

        for index, activity in enumerate(activities):
            search_query = None
            if isinstance(activity, KnowledgeBaseSearchIndexActivityRecord):
                if activity.search_index_arguments:
                    search_query = activity.search_index_arguments.search
            elif isinstance(activity, KnowledgeBaseWebActivityRecord):
                if activity.web_arguments:
                    search_query = activity.web_arguments.search
            elif isinstance(activity, KnowledgeBaseRemoteSharePointActivityRecord):
                if activity.remote_share_point_arguments:
                    search_query = activity.remote_share_point_arguments.search

            activity_details_by_id[activity.id] = ActivityDetail(
                id=activity.id,
                number=index + 1,
                type=activity.type or "",
                source=getattr(activity, "knowledge_source_name", "")
                or "",  # Not all activity types have knowledge_source_name
                query=search_query or "",
            )

        # Extract references
        references = response.references or []

        document_refs = [
            r for r in references if isinstance(r, KnowledgeBaseSearchIndexReference) or hasattr(r, "doc_key")
        ]
        document_results: list[Document] = []
        # Create documents from reference source data
        for ref in document_refs:
            if ref.source_data and ref.doc_key:
                # Note that ref.doc_key is the same as source_data["id"]
                document_results.append(
                    Document(
                        id=cast(str, ref.doc_key),
                        ref_id=ref.id,
                        content=ref.source_data.get("content"),
                        category=ref.source_data.get("category"),
                        sourcepage=ref.source_data.get("sourcepage"),
                        sourcefile=ref.source_data.get("sourcefile"),
                        oids=ref.source_data.get("oids"),
                        groups=ref.source_data.get("groups"),
                        reranker_score=getattr(ref, "reranker_score", None),
                        images=ref.source_data.get("images"),
                        activity=activity_details_by_id[ref.activity_source],
                    )
                )

        # We need to handle KnowledgeBaseWebReference separately if web knowledge source is used
        web_refs = [r for r in references if isinstance(r, KnowledgeBaseWebReference)]
        web_results: list[WebResult] = []
        for ref in web_refs:
            web_result = WebResult(
                id=ref.id, title=ref.title, url=ref.url, activity=activity_details_by_id[ref.activity_source]
            )
            web_results.append(web_result)

        # Handle KnowledgeBaseRemoteSharePointReference if SharePoint knowledge source is used
        sharepoint_refs = [r for r in references if isinstance(r, KnowledgeBaseRemoteSharePointReference)]
        sharepoint_results: list[SharePointResult] = []
        for ref in sharepoint_refs:
            # Extract content from all sourceData.extracts[].text and concatenate
            content = None
            if ref.source_data and "extracts" in ref.source_data and len(ref.source_data["extracts"]) > 0:
                extracts = [extract.get("text", "") for extract in ref.source_data["extracts"]]
                content = "\n\n".join(extracts) if extracts else None

            # Extract title from sourceData.resourceMetadata.title
            title = None
            if ref.source_data and "resourceMetadata" in ref.source_data:
                title = ref.source_data["resourceMetadata"].get("title")

            sharepoint_result = SharePointResult(
                id=ref.id,
                web_url=ref.web_url,
                content=content,
                title=title,
                reranker_score=getattr(ref, "reranker_score", None),
                activity=activity_details_by_id[ref.activity_source],
            )
            sharepoint_results.append(sharepoint_result)

        # Extract answer from response if web knowledge source provided one
        answer: Optional[str] = None
        if (
            use_web_source
            and response.response
            and len(response.response) > 0
            and len(response.response[0].content) > 0
        ):
            message_content = response.response[0].content[0]
            if isinstance(message_content, KnowledgeBaseMessageTextContent):
                raw_answer: Optional[str] = message_content.text
                # Replace all ref_id tokens (web -> URL, documents -> sourcepage, SharePoint -> web_url)
                if raw_answer:
                    answer = self.replace_all_ref_ids(raw_answer, document_results, web_results, sharepoint_results)

        thoughts.append(
            ThoughtStep(
                "Agentic retrieval response",
                [result.serialize_for_results() for result in document_results + web_results + sharepoint_results],
                {
                    "query_plan": (
                        [activity.as_dict() for activity in response.activity] if response.activity else None
                    ),
                    "model": self.knowledgebase_model,
                    "deployment": self.knowledgebase_deployment,
                    "reranker_threshold": minimum_reranker_score,
                    "filter": filter_add_on,
                },
            )
        )

        return AgenticRetrievalResults(
            response=response,
            documents=document_results,
            web_results=web_results,
            sharepoint_results=sharepoint_results,
            answer=answer,
            rewrite_result=rewrite_result,
            activity_details_by_id=activity_details_by_id,
            thoughts=thoughts,
        )

    def replace_all_ref_ids(
        self,
        answer: str,
        documents: list[Document],
        web_results: list[WebResult],
        sharepoint_results: Optional[list[SharePointResult]] = None,
    ) -> str:
        """Replace [ref_id:<id>] tokens with document sourcepage, web URL, or SharePoint web_url.

        Priority: web result -> SharePoint result -> document.
        Unknown ids left untouched.
        """
        doc_map = {d.ref_id: d.sourcepage for d in documents if d.ref_id and d.sourcepage}
        web_map = {str(w.id): w.url for w in web_results if w.id and w.url}
        sharepoint_entries = sharepoint_results or []
        sharepoint_map = {str(sp.id): sp.web_url.split("/")[-1] for sp in sharepoint_entries if sp.id and sp.web_url}

        def _sub(match: re.Match) -> str:
            ref_id = match.group(1)
            if ref_id in web_map and web_map[ref_id]:
                return f"[{web_map[ref_id]}]"
            if ref_id in sharepoint_map and sharepoint_map[ref_id]:
                return f"[{sharepoint_map[ref_id]}]"
            if ref_id in doc_map and doc_map[ref_id]:
                return f"[{doc_map[ref_id]}]"
            return match.group(0)

        return re.sub(r"\[ref_id:([^\]]+)\]", _sub, answer)

    async def get_sources_content(
        self,
        results: list[Document],
        use_semantic_captions: bool,
        include_text_sources: bool,
        download_image_sources: bool,
        user_oid: Optional[str] = None,
        web_results: Optional[list[WebResult]] = None,
        sharepoint_results: Optional[list[SharePointResult]] = None,
    ) -> DataPoints:
        """Extract text/image sources & citations from documents.

        Args:
            results: List of retrieved Document objects.
            use_semantic_captions: Whether to use semantic captions instead of full content text.
            download_image_sources: Whether to attempt downloading & base64 encoding referenced images.
            user_oid: Optional user object id for per-user storage access (ADLS scenarios).
            web_results: Optional list of web retrieval results to expose to clients.
            sharepoint_results: Optional list of SharePoint retrieval results to expose to clients.

        Returns:
            DataPoints: with text (list[str]), images (list[str - base64 data URI]), citations (list[str]).
        """

        def clean_source(s: str) -> str:
            s = s.replace("\n", " ").replace("\r", " ")  # normalize newlines to spaces
            s = s.replace(":::", "&#58;&#58;&#58;")  # escape DocFX/markdown triple colons
            return s

        citations = []
        text_sources = []
        image_sources = []
        seen_urls = set()
        external_results_metadata: list[dict[str, Any]] = []
        citation_activity_details: dict[str, dict[str, Any]] = {}

        for doc in results:
            # Get the citation for the source page
            citation = self.get_citation(doc.sourcepage)
            if citation not in citations:
                citations.append(citation)
                # Add activity details if available
                if doc.activity:
                    citation_activity_details[citation] = asdict(doc.activity)

            # If semantic captions are used, extract captions; otherwise, use content
            if include_text_sources:
                if use_semantic_captions and doc.captions:
                    cleaned = clean_source(" . ".join([cast(str, c.text) for c in doc.captions]))
                else:
                    cleaned = clean_source(doc.content or "")
                text_sources.append(f"{citation}: {cleaned}")

            if download_image_sources and hasattr(doc, "images") and doc.images:
                for img in doc.images:
                    # Skip if we've already processed this URL
                    if img["url"] in seen_urls or not img["url"]:
                        continue
                    seen_urls.add(img["url"])
                    url = await self.download_blob_as_base64(img["url"], user_oid=user_oid)
                    if url:
                        image_sources.append(url)
                    image_citation = self.get_image_citation(doc.sourcepage or "", img["url"])
                    citations.append(image_citation)
        if web_results:
            for web in web_results:
                citation = self.get_citation(web.url)
                if citation and citation not in citations:
                    citations.append(citation)
                    # Add activity details if available
                    if web.activity:
                        citation_activity_details[citation] = asdict(web.activity)
                external_results_metadata.append(
                    {
                        "id": web.id,
                        "title": web.title,
                        "url": web.url,
                        "activity": asdict(web.activity) if web.activity else None,
                    }
                )
        if sharepoint_results:
            for sp in sharepoint_results:
                # Extract filename from web_url for citation
                filename = sp.web_url.split("/")[-1] if sp.web_url else ""
                citation = self.get_citation(filename)
                if citation and citation not in citations:
                    citations.append(citation)
                    # Add activity details if available
                    if sp.activity:
                        citation_activity_details[citation] = asdict(sp.activity)
                if include_text_sources and sp.content:
                    text_sources.append(f"{citation}: {clean_source(sp.content)}")
                external_results_metadata.append(
                    {
                        "id": sp.id,
                        "title": sp.title or "",
                        "url": sp.web_url or "",
                        "snippet": clean_source(sp.content or ""),
                        "activity": asdict(sp.activity) if sp.activity else None,
                    }
                )

        return DataPoints(
            text=text_sources,
            images=image_sources,
            citations=citations,
            external_results_metadata=external_results_metadata,
            citation_activity_details=citation_activity_details if citation_activity_details else None,
        )

    def get_citation(self, sourcepage: Optional[str]):
        return sourcepage or ""

    def get_image_citation(self, sourcepage: Optional[str], image_url: str):
        sourcepage_citation = self.get_citation(sourcepage)
        image_filename = image_url.split("/")[-1]
        return f"{sourcepage_citation}({image_filename})"

    async def download_blob_as_base64(self, blob_url: str, user_oid: Optional[str] = None) -> Optional[str]:
        """
        Downloads a blob from either Azure Blob Storage or Azure Data Lake Storage and returns it as a base64 encoded string.

        Args:
            blob_url: The URL or path to the blob to download
            user_oid: The user's object ID, required for Data Lake Storage operations and access control

        Returns:
            Optional[str]: The base64 encoded image data with data URI scheme prefix, or None if the blob cannot be downloaded
        """

        # Handle full URLs for both Blob Storage and Data Lake Storage
        container: Optional[str] = None
        if blob_url.startswith("http"):
            url_parts = blob_url.split("/")
            # Extract container name from URL
            # For blob: https://{account}.blob.core.windows.net/{container}/{blob_path}
            # For dfs: https://{account}.dfs.core.windows.net/{filesystem}/{path}
            container = url_parts[3]
            # Extract the blob path portion (everything after the container/filesystem segment)
            blob_path = "/".join(url_parts[4:])
            # If %20 in URL, replace it with a space
            blob_path = blob_path.replace("%20", " ")
        else:
            # Treat as a direct blob path
            blob_path = blob_url

        # Download the blob using the appropriate client
        result = None
        if ".dfs.core.windows.net" in blob_url and self.user_blob_manager:
            result = await self.user_blob_manager.download_blob(blob_path, user_oid=user_oid, container=container)
        elif self.global_blob_manager:
            result = await self.global_blob_manager.download_blob(blob_path, container=container)

        if result:
            content, _ = result  # Unpack the tuple, ignoring properties
            img = base64.b64encode(content).decode("utf-8")
            return f"data:image/png;base64,{img}"
        return None

    async def compute_text_embedding(self, q: str):
        SUPPORTED_DIMENSIONS_MODEL = {
            "text-embedding-ada-002": False,
            "text-embedding-3-small": True,
            "text-embedding-3-large": True,
        }

        class ExtraArgs(TypedDict, total=False):
            dimensions: int

        dimensions_args: ExtraArgs = (
            {"dimensions": self.embedding_dimensions} if SUPPORTED_DIMENSIONS_MODEL[self.embedding_model] else {}
        )
        embedding = await self.openai_client.embeddings.create(
            # Azure OpenAI takes the deployment name as the model name
            model=self.embedding_deployment if self.embedding_deployment else self.embedding_model,
            input=q,
            **dimensions_args,
        )
        query_vector = embedding.data[0].embedding
        # This performs an oversampling due to how the search index was setup,
        # so we do not need to explicitly pass in an oversampling parameter here
        return VectorizedQuery(vector=query_vector, k=50, fields=self.embedding_field)

    async def compute_multimodal_embedding(self, q: str):
        if not self.image_embeddings_client:
            raise ValueError("Approach is missing an image embeddings client for multimodal queries")
        multimodal_query_vector = await self.image_embeddings_client.create_embedding_for_text(q)
        return VectorizedQuery(vector=multimodal_query_vector, k=50, fields="images/embedding")

    def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[str, str]:
        # Allows client to replace the entire prompt, or to inject into the existing prompt using >>>
        if override_prompt is None:
            return {}
        elif override_prompt.startswith(">>>"):
            return {"injected_prompt": override_prompt[3:]}
        else:
            return {"override_prompt": override_prompt}

    def get_response_token_limit(self, model: str, default_limit: int) -> int:
        if model in self.GPT_REASONING_MODELS:
            return self.RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT

        return default_limit

    def get_lowest_reasoning_effort(self, model: str) -> ChatCompletionReasoningEffort:
        """
        Return the lowest valid reasoning_effort for the given model.
        """
        if model not in self.GPT_REASONING_MODELS:
            return None
        if self.GPT_REASONING_MODELS[model].minimal_effort:
            return "minimal"
        return "low"

    def create_chat_completion(
        self,
        chatgpt_deployment: Optional[str],
        chatgpt_model: str,
        messages: list[ChatCompletionMessageParam],
        overrides: dict[str, Any],
        response_token_limit: int,
        should_stream: bool = False,
        tools: Optional[list[ChatCompletionToolParam]] = None,
        temperature: Optional[float] = None,
        n: Optional[int] = None,
        reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
    ) -> Awaitable[ChatCompletion] | Awaitable[AsyncStream[ChatCompletionChunk]]:
        if chatgpt_model in self.GPT_REASONING_MODELS:
            params: dict[str, Any] = {
                # max_tokens is not supported
                "max_completion_tokens": response_token_limit
            }

            # Adjust parameters for reasoning models
            supported_features = self.GPT_REASONING_MODELS[chatgpt_model]
            if supported_features.streaming and should_stream:
                params["stream"] = True
                params["stream_options"] = {"include_usage": True}
            params["reasoning_effort"] = reasoning_effort or overrides.get("reasoning_effort") or self.reasoning_effort

        else:
            # Include parameters that may not be supported for reasoning models
            params = {
                "max_tokens": response_token_limit,
                "temperature": temperature or overrides.get("temperature", 0.3),
            }
        if should_stream:
            params["stream"] = True
            params["stream_options"] = {"include_usage": True}

        if tools is not None:
            params["tools"] = tools

        # Azure OpenAI takes the deployment name as the model name
        seed_value: Optional[int] = overrides.get("seed", None)
        return self.openai_client.chat.completions.create(  # type: ignore[no-matching-overload]
            model=chatgpt_deployment if chatgpt_deployment else chatgpt_model,
            messages=messages,
            seed=seed_value,
            n=n or 1,
            **params,
        )

    def format_thought_step_for_chatcompletion(
        self,
        title: str,
        messages: list[ChatCompletionMessageParam],
        overrides: dict[str, Any],
        model: str,
        deployment: Optional[str],
        usage: Optional[CompletionUsage] = None,
        reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
    ) -> ThoughtStep:
        properties: dict[str, Any] = {"model": model}
        if deployment:
            properties["deployment"] = deployment
        # Only add reasoning_effort setting if the model supports it
        if model in self.GPT_REASONING_MODELS:
            properties["reasoning_effort"] = reasoning_effort or overrides.get(
                "reasoning_effort", self.reasoning_effort
            )
        if usage:
            properties["token_usage"] = TokenUsageProps.from_completion_usage(usage)
        return ThoughtStep(title, messages, properties)

    async def run(
        self,
        messages: list[ChatCompletionMessageParam],
        session_state: Any = None,
        context: dict[str, Any] = {},
    ) -> dict[str, Any]:
        raise NotImplementedError

    async def run_stream(
        self,
        messages: list[ChatCompletionMessageParam],
        session_state: Any = None,
        context: dict[str, Any] = {},
    ) -> AsyncGenerator[dict[str, Any], None]:
        raise NotImplementedError


================================================
FILE: app/backend/approaches/chatreadretrieveread.py
================================================
import re
from collections.abc import AsyncGenerator, Awaitable
from dataclasses import asdict
from typing import Any, Optional, cast

from azure.search.documents.aio import SearchClient
from azure.search.documents.knowledgebases.aio import KnowledgeBaseRetrievalClient
from azure.search.documents.models import VectorQuery
from openai import AsyncOpenAI, AsyncStream
from openai.types.chat import (
    ChatCompletion,
    ChatCompletionChunk,
    ChatCompletionMessageParam,
)
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_message import ChatCompletionMessage

from approaches.approach import (
    Approach,
    ExtraInfo,
    ThoughtStep,
)
from approaches.promptmanager import PromptManager
from prepdocslib.blobmanager import AdlsBlobManager, BlobManager
from prepdocslib.embeddings import ImageEmbeddings


class ChatReadRetrieveReadApproach(Approach):
    """
    A multi-step approach that first uses OpenAI to turn the user's question into a search query,
    then uses Azure AI Search to retrieve relevant documents, and then sends the conversation history,
    original user question, and search results to OpenAI to generate a response.
    """

    NO_RESPONSE = Approach.QUERY_REWRITE_NO_RESPONSE

    def __init__(
        self,
        *,
        search_client: SearchClient,
        search_index_name: str,
        knowledgebase_model: Optional[str],
        knowledgebase_deployment: Optional[str],
        knowledgebase_client: Optional[KnowledgeBaseRetrievalClient],
        knowledgebase_client_with_web: Optional[KnowledgeBaseRetrievalClient] = None,
        knowledgebase_client_with_sharepoint: Optional[KnowledgeBaseRetrievalClient] = None,
        knowledgebase_client_with_web_and_sharepoint: Optional[KnowledgeBaseRetrievalClient] = None,
        openai_client: AsyncOpenAI,
        chatgpt_model: str,
        chatgpt_deployment: Optional[str],  # Not needed for non-Azure OpenAI
        embedding_deployment: Optional[str],  # Not needed for non-Azure OpenAI or for retrieval_mode="text"
        embedding_model: str,
        embedding_dimensions: int,
        embedding_field: str,
        sourcepage_field: str,
        content_field: str,
        query_language: str,
        query_speller: str,
        prompt_manager: PromptManager,
        reasoning_effort: Optional[str] = None,
        multimodal_enabled: bool = False,
        image_embeddings_client: Optional[ImageEmbeddings] = None,
        global_blob_manager: Optional[BlobManager] = None,
        user_blob_manager: Optional[AdlsBlobManager] = None,
        use_web_source: bool = False,
        use_sharepoint_source: bool = False,
        retrieval_reasoning_effort: Optional[str] = None,
    ):
        self.search_client = search_client
        self.search_index_name = search_index_name
        self.knowledgebase_model = knowledgebase_model
        self.knowledgebase_deployment = knowledgebase_deployment
        self.knowledgebase_client = knowledgebase_client
        self.knowledgebase_client_with_web = knowledgebase_client_with_web
        self.knowledgebase_client_with_sharepoint = knowledgebase_client_with_sharepoint
        self.knowledgebase_client_with_web_and_sharepoint = knowledgebase_client_with_web_and_sharepoint
        self.openai_client = openai_client
        self.chatgpt_model = chatgpt_model
        self.chatgpt_deployment = chatgpt_deployment
        self.embedding_deployment = embedding_deployment
        self.embedding_model = embedding_model
        self.embedding_dimensions = embedding_dimensions
        self.embedding_field = embedding_field
        self.sourcepage_field = sourcepage_field
        self.content_field = content_field
        self.query_language = query_language
        self.query_speller = query_speller
        self.prompt_manager = prompt_manager
        self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
        self.reasoning_effort = reasoning_effort
        self.include_token_usage = True
        self.multimodal_enabled = multimodal_enabled
        self.image_embeddings_client = image_embeddings_client
        self.global_blob_manager = global_blob_manager
        self.user_blob_manager = user_blob_manager
        # Track whether web source retrieval is enabled for this deployment; overrides may only disable it.
        self.web_source_enabled = use_web_source
        self.use_sharepoint_source = use_sharepoint_source
        self.retrieval_reasoning_effort = retrieval_reasoning_effort

    def extract_followup_questions(self, content: Optional[str]):
        if content is None:
            return content, []
        return content.split("<<")[0], re.findall(r"<<([^>>]+)>>", content)

    def get_search_query(self, chat_completion: ChatCompletion, default_query: str) -> str:
        """Read the optimized search query from a chat completion tool call."""
        try:
            return self.extract_rewritten_query(chat_completion, default_query, no_response_token=self.NO_RESPONSE)
        except Exception:
            return default_query

    async def run_without_streaming(
        self,
        messages: list[ChatCompletionMessageParam],
        overrides: dict[str, Any],
        auth_claims: dict[str, Any],
        session_state: Any = None,
    ) -> dict[str, Any]:
        extra_info, chat_coroutine = await self.run_until_final_call(
            messages, overrides, auth_claims, should_stream=False
        )
        chat_completion_response: ChatCompletion = await cast(Awaitable[ChatCompletion], chat_coroutine)
        content = chat_completion_response.choices[0].message.content
        role = chat_completion_response.choices[0].message.role
        if overrides.get("suggest_followup_questions"):
            content, followup_questions = self.extract_followup_questions(content)
            extra_info.followup_questions = followup_questions
        # Assume last thought is for generating answer
        # TODO: Update for agentic? This isn't still true?
        if self.include_token_usage and extra_info.thoughts and chat_completion_response.usage:
            extra_info.thoughts[-1].update_token_usage(chat_completion_response.usage)
        chat_app_response = {
            "message": {"content": content, "role": role},
            "context": {
                "thoughts": extra_info.thoughts,
                "data_points": {
                    key: value for key, value in asdict(extra_info.data_points).items() if value is not None
                },
                "followup_questions": extra_info.followup_questions,
            },
            "session_state": session_state,
        }
        return chat_app_response

    async def run_with_streaming(
        self,
        messages: list[ChatCompletionMessageParam],
        overrides: dict[str, Any],
        auth_claims: dict[str, Any],
        session_state: Any = None,
    ) -> AsyncGenerator[dict, None]:
        extra_info, chat_coroutine = await self.run_until_final_call(
            messages, overrides, auth_claims, should_stream=True
        )
        yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}

        followup_questions_started = False
        followup_content = ""
        chat_result = await chat_coroutine

        if isinstance(chat_result, ChatCompletion):
            message = chat_result.choices[0].message
            content = message.content or ""
            role = message.role or "assistant"

            followup_questions: list[str] = []
            if overrides.get("suggest_followup_questions"):
                content, followup_questions = self.extract_followup_qu
Download .txt
gitextract_llud8udi/

├── .azdo/
│   └── pipelines/
│       └── azure-dev.yml
├── .devcontainer/
│   └── devcontainer.json
├── .gitattributes
├── .github/
│   ├── CODE_OF_CONDUCT.md
│   ├── ISSUE_TEMPLATE.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── agents/
│   │   ├── fixer.agent.md
│   │   └── triager.agent.md
│   ├── dependabot.yaml
│   ├── instructions/
│   │   └── bicep.instructions.md
│   ├── prompts/
│   │   └── review_pr_comments.prompt.md
│   ├── skills/
│   │   └── github-pr-inline-reply/
│   │       └── SKILL.md
│   └── workflows/
│       ├── azure-dev-validation.yaml
│       ├── azure-dev.yml
│       ├── evaluate.yaml
│       ├── frontend.yaml
│       ├── lint-markdown.yml
│       ├── nightly-jobs.yaml
│       ├── python-test.yaml
│       ├── stale-bot.yml
│       └── validate-markdown.yml
├── .gitignore
├── .markdownlint-cli2.jsonc
├── .pre-commit-config.yaml
├── .vscode/
│   ├── extensions.json
│   ├── launch.json
│   ├── settings.json
│   └── tasks.json
├── AGENTS.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── app/
│   ├── backend/
│   │   ├── .dockerignore
│   │   ├── Dockerfile
│   │   ├── app.py
│   │   ├── approaches/
│   │   │   ├── __init__.py
│   │   │   ├── approach.py
│   │   │   ├── chatreadretrieveread.py
│   │   │   ├── promptmanager.py
│   │   │   └── prompts/
│   │   │       ├── chat_answer.system.jinja2
│   │   │       ├── chat_answer.user.jinja2
│   │   │       ├── chat_query_rewrite_tools.json
│   │   │       └── query_rewrite.system.jinja2
│   │   ├── chat_history/
│   │   │   ├── __init__.py
│   │   │   └── cosmosdb.py
│   │   ├── config.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── authentication.py
│   │   │   └── sessionhelper.py
│   │   ├── custom_uvicorn_worker.py
│   │   ├── decorators.py
│   │   ├── error.py
│   │   ├── gunicorn.conf.py
│   │   ├── load_azd_env.py
│   │   ├── main.py
│   │   ├── prepdocs.py
│   │   ├── prepdocslib/
│   │   │   ├── __init__.py
│   │   │   ├── blobmanager.py
│   │   │   ├── cloudingestionstrategy.py
│   │   │   ├── csvparser.py
│   │   │   ├── embeddings.py
│   │   │   ├── figureprocessor.py
│   │   │   ├── fileprocessor.py
│   │   │   ├── filestrategy.py
│   │   │   ├── htmlparser.py
│   │   │   ├── integratedvectorizerstrategy.py
│   │   │   ├── jsonparser.py
│   │   │   ├── listfilestrategy.py
│   │   │   ├── mediadescriber.py
│   │   │   ├── page.py
│   │   │   ├── parser.py
│   │   │   ├── pdfparser.py
│   │   │   ├── searchmanager.py
│   │   │   ├── servicesetup.py
│   │   │   ├── strategy.py
│   │   │   ├── textparser.py
│   │   │   ├── textprocessor.py
│   │   │   └── textsplitter.py
│   │   ├── requirements.in
│   │   ├── requirements.txt
│   │   └── setup_cloud_ingestion.py
│   ├── frontend/
│   │   ├── .npmrc
│   │   ├── .nvmrc
│   │   ├── .prettierignore
│   │   ├── .prettierrc.json
│   │   ├── index.html
│   │   ├── package.json
│   │   ├── src/
│   │   │   ├── api/
│   │   │   │   ├── api.ts
│   │   │   │   ├── index.ts
│   │   │   │   └── models.ts
│   │   │   ├── authConfig.ts
│   │   │   ├── components/
│   │   │   │   ├── AnalysisPanel/
│   │   │   │   │   ├── AgentPlan.tsx
│   │   │   │   │   ├── AnalysisPanel.module.css
│   │   │   │   │   ├── AnalysisPanel.tsx
│   │   │   │   │   ├── AnalysisPanelTabs.tsx
│   │   │   │   │   ├── ThoughtProcess.tsx
│   │   │   │   │   ├── TokenUsageGraph.tsx
│   │   │   │   │   ├── agentPlanUtils.ts
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── Answer/
│   │   │   │   │   ├── Answer.module.css
│   │   │   │   │   ├── Answer.tsx
│   │   │   │   │   ├── AnswerError.tsx
│   │   │   │   │   ├── AnswerIcon.tsx
│   │   │   │   │   ├── AnswerLoading.tsx
│   │   │   │   │   ├── AnswerParser.tsx
│   │   │   │   │   ├── SpeechOutputAzure.tsx
│   │   │   │   │   ├── SpeechOutputBrowser.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── ClearChatButton/
│   │   │   │   │   ├── ClearChatButton.module.css
│   │   │   │   │   ├── ClearChatButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── Example/
│   │   │   │   │   ├── Example.module.css
│   │   │   │   │   ├── Example.tsx
│   │   │   │   │   ├── ExampleList.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HelpCallout/
│   │   │   │   │   ├── HelpCallout.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── HistoryButton/
│   │   │   │   │   ├── HistoryButton.module.css
│   │   │   │   │   ├── HistoryButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HistoryItem/
│   │   │   │   │   ├── HistoryItem.module.css
│   │   │   │   │   ├── HistoryItem.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HistoryPanel/
│   │   │   │   │   ├── HistoryPanel.module.css
│   │   │   │   │   ├── HistoryPanel.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── HistoryProviders/
│   │   │   │   │   ├── CosmosDB.ts
│   │   │   │   │   ├── HistoryManager.ts
│   │   │   │   │   ├── IProvider.ts
│   │   │   │   │   ├── IndexedDB.ts
│   │   │   │   │   ├── None.ts
│   │   │   │   │   └── index.ts
│   │   │   │   ├── LoginButton/
│   │   │   │   │   ├── LoginButton.module.css
│   │   │   │   │   ├── LoginButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── MarkdownViewer/
│   │   │   │   │   ├── MarkdownViewer.module.css
│   │   │   │   │   ├── MarkdownViewer.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── QuestionInput/
│   │   │   │   │   ├── QuestionInput.module.css
│   │   │   │   │   ├── QuestionInput.tsx
│   │   │   │   │   ├── SpeechInput.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── Settings/
│   │   │   │   │   ├── Settings.module.css
│   │   │   │   │   └── Settings.tsx
│   │   │   │   ├── SettingsButton/
│   │   │   │   │   ├── SettingsButton.module.css
│   │   │   │   │   ├── SettingsButton.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── SupportingContent/
│   │   │   │   │   ├── SupportingContent.module.css
│   │   │   │   │   ├── SupportingContent.tsx
│   │   │   │   │   ├── SupportingContentParser.ts
│   │   │   │   │   └── index.ts
│   │   │   │   ├── TokenClaimsDisplay/
│   │   │   │   │   ├── TokenClaimsDisplay.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── UploadFile/
│   │   │   │   │   ├── UploadFile.module.css
│   │   │   │   │   ├── UploadFile.tsx
│   │   │   │   │   └── index.tsx
│   │   │   │   ├── UserChatMessage/
│   │   │   │   │   ├── UserChatMessage.module.css
│   │   │   │   │   ├── UserChatMessage.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   └── VectorSettings/
│   │   │   │       ├── VectorSettings.module.css
│   │   │   │       ├── VectorSettings.tsx
│   │   │   │       └── index.ts
│   │   │   ├── i18n/
│   │   │   │   ├── LanguagePicker.module.css
│   │   │   │   ├── LanguagePicker.tsx
│   │   │   │   ├── config.ts
│   │   │   │   └── index.tsx
│   │   │   ├── index.css
│   │   │   ├── index.tsx
│   │   │   ├── layoutWrapper.tsx
│   │   │   ├── locales/
│   │   │   │   ├── da/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── en/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── es/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── fr/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── it/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── ja/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── nl/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── pl/
│   │   │   │   │   └── translation.json
│   │   │   │   ├── ptBR/
│   │   │   │   │   └── translation.json
│   │   │   │   └── tr/
│   │   │   │       └── translation.json
│   │   │   ├── loginContext.tsx
│   │   │   ├── pages/
│   │   │   │   ├── NoPage.tsx
│   │   │   │   ├── chat/
│   │   │   │   │   ├── Chat.module.css
│   │   │   │   │   └── Chat.tsx
│   │   │   │   └── layout/
│   │   │   │       ├── Layout.module.css
│   │   │   │       └── Layout.tsx
│   │   │   └── vite-env.d.ts
│   │   ├── tsconfig.json
│   │   └── vite.config.ts
│   ├── functions/
│   │   ├── __init__.py
│   │   ├── document_extractor/
│   │   │   ├── .funcignore
│   │   │   ├── function_app.py
│   │   │   └── host.json
│   │   ├── figure_processor/
│   │   │   ├── .funcignore
│   │   │   ├── function_app.py
│   │   │   └── host.json
│   │   └── text_processor/
│   │       ├── .funcignore
│   │       ├── function_app.py
│   │       └── host.json
│   ├── start.ps1
│   └── start.sh
├── azure.yaml
├── data/
│   ├── Json_Examples/
│   │   ├── 2189.json
│   │   ├── 2190.json
│   │   ├── 2191.json
│   │   ├── 2192.json
│   │   └── query.json
│   └── Zava_Company_Overview.md
├── docs/
│   ├── README.md
│   ├── agentic_retrieval.md
│   ├── appservice.md
│   ├── architecture.md
│   ├── azd.md
│   ├── azure_app_service.md
│   ├── azure_container_apps.md
│   ├── customization.md
│   ├── data_ingestion.md
│   ├── deploy_existing.md
│   ├── deploy_features.md
│   ├── deploy_freetrial.md
│   ├── deploy_lowcost.md
│   ├── deploy_private.md
│   ├── deploy_troubleshooting.md
│   ├── evaluation.md
│   ├── http_protocol.md
│   ├── localdev.md
│   ├── login_and_acl.md
│   ├── monitoring.md
│   ├── multimodal.md
│   ├── other_samples.md
│   ├── productionizing.md
│   ├── reasoning.md
│   ├── safety_evaluation.md
│   ├── sharing_environments.md
│   └── textsplitter.md
├── evals/
│   ├── evaluate.py
│   ├── evaluate_config.json
│   ├── evaluate_config_multimodal.json
│   ├── generate_ground_truth.py
│   ├── ground_truth.jsonl
│   ├── ground_truth_kg.json
│   ├── ground_truth_multimodal.jsonl
│   ├── requirements.txt
│   ├── results/
│   │   ├── baseline/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt35turbo-ada002/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt4omini-ada002/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt4omini-emb3l/
│   │   │   ├── README.md
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt4omini-emb3l-2/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5-emb3l/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5chat-emb3l/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5mini-emb3l/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── gpt5mini-emb3l-2/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   └── o3mini-ada002/
│   │       ├── config.json
│   │       ├── eval_results.jsonl
│   │       ├── evaluate_parameters.json
│   │       └── summary.json
│   ├── results_multimodal/
│   │   ├── baseline/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   ├── no-image-embeddings/
│   │   │   ├── config.json
│   │   │   ├── eval_results.jsonl
│   │   │   ├── evaluate_parameters.json
│   │   │   └── summary.json
│   │   └── no-image-sources/
│   │       ├── config.json
│   │       ├── eval_results.jsonl
│   │       ├── evaluate_parameters.json
│   │       └── summary.json
│   ├── safety_evaluation.py
│   └── safety_results.json
├── infra/
│   ├── abbreviations.json
│   ├── app/
│   │   ├── functions-app.bicep
│   │   ├── functions-rbac.bicep
│   │   ├── functions.bicep
│   │   └── storage-containers.bicep
│   ├── backend-dashboard.bicep
│   ├── bicepconfig.json
│   ├── core/
│   │   ├── ai/
│   │   │   ├── ai-environment.bicep
│   │   │   ├── hub.bicep
│   │   │   └── project.bicep
│   │   ├── auth/
│   │   │   └── appregistration.bicep
│   │   ├── host/
│   │   │   ├── appservice-appsettings.bicep
│   │   │   ├── appservice.bicep
│   │   │   ├── appserviceplan.bicep
│   │   │   ├── container-app-upsert.bicep
│   │   │   ├── container-app.bicep
│   │   │   ├── container-apps-auth.bicep
│   │   │   ├── container-apps-environment.bicep
│   │   │   ├── container-apps.bicep
│   │   │   └── container-registry.bicep
│   │   ├── monitor/
│   │   │   └── monitoring.bicep
│   │   ├── networking/
│   │   │   ├── private-dns-zones.bicep
│   │   │   ├── private-endpoint.bicep
│   │   │   └── vnet.bicep
│   │   ├── search/
│   │   │   ├── search-diagnostics.bicep
│   │   │   └── search-services.bicep
│   │   ├── security/
│   │   │   ├── aca-identity.bicep
│   │   │   ├── documentdb-sql-role.bicep
│   │   │   ├── registry-access.bicep
│   │   │   ├── role.bicep
│   │   │   └── storage-role.bicep
│   │   └── storage/
│   │       └── storage-account.bicep
│   ├── main.bicep
│   ├── main.parameters.json
│   ├── main.test.bicep
│   ├── network-isolation.bicep
│   └── private-endpoints.bicep
├── locustfile.py
├── ps-rule.yaml
├── pyproject.toml
├── requirements-dev.txt
├── scripts/
│   ├── adlsgen2setup.py
│   ├── auth_common.py
│   ├── auth_init.ps1
│   ├── auth_init.py
│   ├── auth_init.sh
│   ├── auth_update.ps1
│   ├── auth_update.py
│   ├── auth_update.sh
│   ├── copy_prepdocslib.py
│   ├── cosmosdb_migration.py
│   ├── load-balance-aca-setup.sh
│   ├── load_azd_env.py
│   ├── load_python_env.ps1
│   ├── load_python_env.sh
│   ├── manageacl.py
│   ├── prepdocs.ps1
│   ├── prepdocs.sh
│   ├── roles.ps1
│   ├── roles.sh
│   ├── sampleacls.json
│   ├── setup_cloud_ingestion.ps1
│   ├── setup_cloud_ingestion.sh
│   └── verify_search_index_acls.py
└── tests/
    ├── __init__.py
    ├── conftest.py
    ├── e2e.py
    ├── mocks.py
    ├── snapshots/
    │   ├── test_app/
    │   │   ├── test_chat_followup/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_handle_exception/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_handle_exception_contentsafety/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_handle_exception_contentsafety_streaming/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_handle_exception_streaming/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_hybrid/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_hybrid_semantic_captions/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_hybrid_semantic_ranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_prompt_template/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_prompt_template_concat/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_seed/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_session_state_persists/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_stream_followup/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_handle_exception/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_stream_session_state_persists/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_text/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_text_filter/
    │   │   │   └── auth_client0/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_text_reasoning/
    │   │   │   ├── reasoning_client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── reasoning_client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_stream_vision/
    │   │   │   └── client0/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_text/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_agent/
    │   │   │   ├── knowledgebase_client0/
    │   │   │   │   └── result.json
    │   │   │   ├── knowledgebase_client1_web/
    │   │   │   │   └── result.json
    │   │   │   └── knowledgebase_client2_sharepoint/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_filter/
    │   │   │   └── auth_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_filter_agent/
    │   │   │   └── knowledgebase_auth_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_filter_public_documents/
    │   │   │   └── auth_public_documents_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_reasoning/
    │   │   │   ├── reasoning_client0/
    │   │   │   │   └── result.json
    │   │   │   └── reasoning_client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_semantic_ranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_semanticcaptions/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_text_semanticranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_vector/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_vector_semantic_ranker/
    │   │   │   ├── client0/
    │   │   │   │   └── result.json
    │   │   │   └── client1/
    │   │   │       └── result.json
    │   │   ├── test_chat_vision/
    │   │   │   ├── client0/
    │   │   │   │   ├── result.json
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   ├── test_chat_vision_user/
    │   │   │   └── auth_client0/
    │   │   │       └── result.json
    │   │   ├── test_chat_vision_vectors/
    │   │   │   ├── client0/
    │   │   │   │   └── result.jsonlines
    │   │   │   └── client1/
    │   │   │       └── result.jsonlines
    │   │   └── test_chat_with_history/
    │   │       ├── client0/
    │   │       │   └── result.json
    │   │       └── client1/
    │   │           └── result.json
    │   ├── test_authenticationhelper/
    │   │   ├── test_auth_setup/
    │   │   │   └── result.json
    │   │   ├── test_auth_setup_required_access_control/
    │   │   │   └── result.json
    │   │   └── test_auth_setup_required_access_control_and_unauthenticated_access/
    │   │       └── result.json
    │   ├── test_cosmosdb/
    │   │   ├── test_chathistory_getitem/
    │   │   │   └── auth_public_documents_client0/
    │   │   │       └── result.json
    │   │   ├── test_chathistory_query/
    │   │   │   └── auth_public_documents_client0/
    │   │   │       └── result.json
    │   │   └── test_chathistory_query_continuation/
    │   │       └── auth_public_documents_client0/
    │   │           └── result.json
    │   └── test_prepdocslib_textsplitter/
    │       ├── test_pages_with_figures/
    │       │   ├── pages_with_figures.json/
    │       │   │   └── split_pages_with_figures.json
    │       │   └── pages_with_just_text.json/
    │       │       └── split_pages_with_figures.json
    │       └── test_sentencetextsplitter_list_parse_and_split/
    │           └── text_splitter_sections.txt
    ├── test-data/
    │   ├── Simple Figure_content.txt
    │   ├── Simple Table_content.txt
    │   ├── pages_with_figures.json
    │   └── pages_with_just_text.json
    ├── test_adlsgen2setup.py
    ├── test_agentic_retrieval.py
    ├── test_app.py
    ├── test_app_config.py
    ├── test_auth_init.py
    ├── test_authenticationhelper.py
    ├── test_blob_manager.py
    ├── test_chatapproach.py
    ├── test_content_file.py
    ├── test_cosmosdb.py
    ├── test_cosmosdb_migration.py
    ├── test_csvparser.py
    ├── test_function_apps.py
    ├── test_htmlparser.py
    ├── test_jsonparser.py
    ├── test_listfilestrategy.py
    ├── test_manageacl.py
    ├── test_mediadescriber.py
    ├── test_pdfparser.py
    ├── test_prepdocs.py
    ├── test_prepdocslib_filestrategy.py
    ├── test_prepdocslib_textsplitter.py
    ├── test_searchmanager.py
    ├── test_sentencetextsplitter.py
    ├── test_servicesetup.py
    ├── test_textparser.py
    ├── test_textprocessor.py
    └── test_upload.py
Download .txt
SYMBOL INDEX (1056 symbols across 119 files)

FILE: app/backend/app.py
  function index (line 115) | async def index():
  function redirect (line 122) | async def redirect():
  function favicon (line 127) | async def favicon():
  function assets (line 132) | async def assets(path):
  function content_file (line 138) | async def content_file(path: str, auth_claims: dict[str, Any]):
  class JSONEncoder (line 183) | class JSONEncoder(json.JSONEncoder):
    method default (line 184) | def default(self, o):
  function format_as_ndjson (line 197) | async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenera...
  function chat (line 208) | async def chat(auth_claims: dict[str, Any]):
  function chat_stream (line 237) | async def chat_stream(auth_claims: dict[str, Any]):
  function auth_setup (line 269) | def auth_setup():
  function config (line 275) | def config():
  function speech (line 305) | async def speech():
  function upload (line 350) | async def upload(auth_claims: dict[str, Any]):
  function delete_uploaded (line 370) | async def delete_uploaded(auth_claims: dict[str, Any]):
  function list_uploaded (line 383) | async def list_uploaded(auth_claims: dict[str, Any]):
  function setup_clients (line 394) | async def setup_clients():
  function close_clients (line 739) | async def close_clients():
  function create_app (line 747) | def create_app():

FILE: app/backend/approaches/approach.py
  class ActivityDetail (line 54) | class ActivityDetail:
  class Document (line 63) | class Document:
    method serialize_for_results (line 78) | def serialize_for_results(self) -> dict[str, Any]:
  class WebResult (line 109) | class WebResult:
    method serialize_for_results (line 115) | def serialize_for_results(self) -> dict[str, Any]:
  class SharePointResult (line 127) | class SharePointResult:
    method serialize_for_results (line 135) | def serialize_for_results(self) -> dict[str, Any]:
  class RewriteQueryResult (line 149) | class RewriteQueryResult:
  class ThoughtStep (line 157) | class ThoughtStep:
    method update_token_usage (line 162) | def update_token_usage(self, usage: CompletionUsage) -> None:
  class AgenticRetrievalResults (line 168) | class AgenticRetrievalResults:
  class DataPoints (line 182) | class DataPoints:
  class ExtraInfo (line 191) | class ExtraInfo:
  class TokenUsageProps (line 199) | class TokenUsageProps:
    method from_completion_usage (line 206) | def from_completion_usage(cls, usage: CompletionUsage) -> "TokenUsageP...
  class GPTReasoningModelSupport (line 220) | class GPTReasoningModelSupport:
  class Approach (line 225) | class Approach(ABC):
    method __init__ (line 241) | def __init__(
    method build_filter (line 285) | def build_filter(self, overrides: dict[str, Any]) -> Optional[str]:
    method search (line 295) | async def search(
    method extract_rewritten_query (line 366) | def extract_rewritten_query(
    method rewrite_query (line 394) | async def rewrite_query(
    method run_agentic_retrieval (line 438) | async def run_agentic_retrieval(
    method replace_all_ref_ids (line 692) | def replace_all_ref_ids(
    method get_sources_content (line 721) | async def get_sources_content(
    method get_citation (line 831) | def get_citation(self, sourcepage: Optional[str]):
    method get_image_citation (line 834) | def get_image_citation(self, sourcepage: Optional[str], image_url: str):
    method download_blob_as_base64 (line 839) | async def download_blob_as_base64(self, blob_url: str, user_oid: Optio...
    method compute_text_embedding (line 880) | async def compute_text_embedding(self, q: str):
    method compute_multimodal_embedding (line 904) | async def compute_multimodal_embedding(self, q: str):
    method get_system_prompt_variables (line 910) | def get_system_prompt_variables(self, override_prompt: Optional[str]) ...
    method get_response_token_limit (line 919) | def get_response_token_limit(self, model: str, default_limit: int) -> ...
    method get_lowest_reasoning_effort (line 925) | def get_lowest_reasoning_effort(self, model: str) -> ChatCompletionRea...
    method create_chat_completion (line 935) | def create_chat_completion(
    method format_thought_step_for_chatcompletion (line 984) | def format_thought_step_for_chatcompletion(
    method run (line 1006) | async def run(
    method run_stream (line 1014) | async def run_stream(

FILE: app/backend/approaches/chatreadretrieveread.py
  class ChatReadRetrieveReadApproach (line 28) | class ChatReadRetrieveReadApproach(Approach):
    method __init__ (line 37) | def __init__(
    method extract_followup_questions (line 101) | def extract_followup_questions(self, content: Optional[str]):
    method get_search_query (line 106) | def get_search_query(self, chat_completion: ChatCompletion, default_qu...
    method run_without_streaming (line 113) | async def run_without_streaming(
    method run_with_streaming (line 146) | async def run_with_streaming(
    method run (line 232) | async def run(
    method run_stream (line 242) | async def run_stream(
    method run_until_final_call (line 252) | async def run_until_final_call(
    method run_search_approach (line 338) | async def run_search_approach(
    method run_agentic_retrieval_approach (line 451) | async def run_agentic_retrieval_approach(
    method _select_knowledgebase_client (line 509) | def _select_knowledgebase_client(

FILE: app/backend/approaches/promptmanager.py
  class PromptManager (line 14) | class PromptManager:
    method __init__ (line 19) | def __init__(self):
    method build_system_prompt (line 27) | def build_system_prompt(
    method build_user_prompt (line 42) | def build_user_prompt(
    method build_conversation (line 66) | def build_conversation(
    method load_tools (line 102) | def load_tools(self, path: str) -> list[ChatCompletionToolParam]:

FILE: app/backend/chat_history/cosmosdb.py
  function post_chat_history (line 24) | async def post_chat_history(auth_claims: dict[str, Any]):
  function get_chat_history_sessions (line 81) | async def get_chat_history_sessions(auth_claims: dict[str, Any]):
  function get_chat_history_session (line 134) | async def get_chat_history_session(auth_claims: dict[str, Any], session_...
  function delete_chat_history_session (line 174) | async def delete_chat_history_session(auth_claims: dict[str, Any], sessi...
  function setup_clients (line 206) | async def setup_clients():
  function close_clients (line 234) | async def close_clients():

FILE: app/backend/core/authentication.py
  class AuthError (line 25) | class AuthError(Exception):
    method __init__ (line 26) | def __init__(self, error, status_code):
    method __str__ (line 30) | def __str__(self) -> str:
  class AuthenticationHelper (line 34) | class AuthenticationHelper:
    method __init__ (line 37) | def __init__(
    method get_auth_setup_for_client (line 77) | def get_auth_setup_for_client(self) -> dict[str, Any]:
    method get_token_auth_header (line 114) | def get_token_auth_header(headers: dict) -> str:
    method get_auth_claims_if_enabled (line 138) | async def get_auth_claims_if_enabled(self, headers: dict) -> dict[str,...
    method check_path_auth (line 176) | async def check_path_auth(self, path: str, auth_claims: dict[str, Any]...
    method create_pem_format (line 205) | async def create_pem_format(self, jwks, token):
    method validate_access_token (line 224) | async def validate_access_token(self, token: str):

FILE: app/backend/core/sessionhelper.py
  function create_session_id (line 5) | def create_session_id(

FILE: app/backend/custom_uvicorn_worker.py
  class CustomUvicornWorker (line 44) | class CustomUvicornWorker(UvicornWorker):

FILE: app/backend/decorators.py
  function authenticated_path (line 13) | def authenticated_path(route_fn: Callable[[str, dict[str, Any]], Any]):
  function authenticated (line 44) | def authenticated(route_fn: _C) -> _C:

FILE: app/backend/error.py
  function error_dict (line 15) | def error_dict(error: Exception) -> dict:
  function error_response (line 23) | def error_response(error: Exception, route: str, status_code: int = 500):

FILE: app/backend/load_azd_env.py
  function load_azd_env (line 11) | def load_azd_env():

FILE: app/backend/prepdocs.py
  function check_search_service_connectivity (line 37) | async def check_search_service_connectivity(search_service: str) -> bool:
  function setup_list_file_strategy (line 50) | def setup_list_file_strategy(
  function setup_file_processors (line 62) | def setup_file_processors(
  function main (line 102) | async def main(strategy: Strategy, setup_index: bool = True):

FILE: app/backend/prepdocslib/blobmanager.py
  class BlobProperties (line 23) | class BlobProperties(TypedDict, total=False):
  class BaseBlobManager (line 29) | class BaseBlobManager:
    method sourcepage_from_file_page (line 35) | def sourcepage_from_file_page(cls, filename, page=0) -> str:
    method blob_name_from_file_name (line 42) | def blob_name_from_file_name(cls, filename) -> str:
    method add_image_citation (line 46) | def add_image_citation(
    method upload_document_image (line 93) | async def upload_document_image(
    method download_blob (line 103) | async def download_blob(
  class AdlsBlobManager (line 123) | class AdlsBlobManager(BaseBlobManager):
    method __init__ (line 130) | def __init__(self, endpoint: str, container: str, credential: AsyncTok...
    method close_clients (line 148) | async def close_clients(self):
    method _ensure_directory (line 151) | async def _ensure_directory(self, directory_path: str, user_oid: str) ...
    method upload_blob (line 173) | async def upload_blob(self, file: File | IO, filename: str, user_oid: ...
    method _get_image_directory_path (line 208) | def _get_image_directory_path(self, document_filename: str, user_oid: ...
    method upload_document_image (line 224) | async def upload_document_image(
    method download_blob (line 258) | async def download_blob(
    method remove_blob (line 316) | async def remove_blob(self, filename: str, user_oid: str) -> None:
    method list_blobs (line 349) | async def list_blobs(self, user_oid: str) -> list[str]:
  class BlobManager (line 389) | class BlobManager(BaseBlobManager):
    method __init__ (line 394) | def __init__(
    method close_clients (line 415) | async def close_clients(self):
    method get_managedidentity_connectionstring (line 418) | def get_managedidentity_connectionstring(self):
    method upload_blob (line 423) | async def upload_blob(self, file: File) -> str:
    method upload_document_image (line 441) | async def upload_document_image(
    method download_blob (line 466) | async def download_blob(
    method remove_blob (line 526) | async def remove_blob(self, path: Optional[str] = None):

FILE: app/backend/prepdocslib/cloudingestionstrategy.py
  class SkillConfig (line 42) | class SkillConfig:
  class CloudIngestionStrategy (line 51) | class CloudIngestionStrategy(Strategy):  # pragma: no cover
    method __init__ (line 54) | def __init__(
    method _build_skillset (line 117) | def _build_skillset(self) -> SearchIndexerSkillset:
    method setup (line 287) | async def setup(self) -> None:
    method run (line 362) | async def run(self) -> None:

FILE: app/backend/prepdocslib/csvparser.py
  class CsvParser (line 9) | class CsvParser(Parser):
    method parse (line 14) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:

FILE: app/backend/prepdocslib/embeddings.py
  class EmbeddingBatch (line 20) | class EmbeddingBatch:
    method __init__ (line 23) | def __init__(self, texts: list[str], token_length: int):
  class ExtraArgs (line 28) | class ExtraArgs(TypedDict, total=False):
  class OpenAIEmbeddings (line 32) | class OpenAIEmbeddings(ABC):
    method __init__ (line 46) | def __init__(
    method _api_model (line 64) | def _api_model(self) -> str:
    method before_retry_sleep (line 67) | def before_retry_sleep(self, retry_state):
    method calculate_token_length (line 70) | def calculate_token_length(self, text: str):
    method split_text_into_batches (line 74) | def split_text_into_batches(self, texts: list[str]) -> list[EmbeddingB...
    method create_embedding_batch (line 105) | async def create_embedding_batch(self, texts: list[str], dimensions_ar...
    method create_embedding_single (line 128) | async def create_embedding_single(self, text: str, dimensions_args: Ex...
    method create_embeddings (line 143) | async def create_embeddings(self, texts: list[str]) -> list[list[float]]:
  class ImageEmbeddings (line 157) | class ImageEmbeddings:
    method __init__ (line 163) | def __init__(self, endpoint: str, token_provider: Callable[[], Awaitab...
    method create_embedding_for_image (line 167) | async def create_embedding_for_image(self, image_bytes: bytes) -> list...
    method create_embedding_for_text (line 185) | async def create_embedding_for_text(self, q: str):
    method before_retry_sleep (line 200) | def before_retry_sleep(self, retry_state):

FILE: app/backend/prepdocslib/figureprocessor.py
  class MediaDescriptionStrategy (line 22) | class MediaDescriptionStrategy(Enum):
  class FigureProcessor (line 30) | class FigureProcessor:
    method __init__ (line 33) | def __init__(
    method get_media_describer (line 52) | async def get_media_describer(self) -> MediaDescriber | None:
    method mark_content_understanding_ready (line 84) | def mark_content_understanding_ready(self) -> None:
    method describe (line 89) | async def describe(self, image_bytes: bytes) -> str | None:
  function build_figure_markup (line 101) | def build_figure_markup(image: "ImageOnPage", description: Optional[str]...
  function process_page_image (line 113) | async def process_page_image(

FILE: app/backend/prepdocslib/fileprocessor.py
  class FileProcessor (line 8) | class FileProcessor:

FILE: app/backend/prepdocslib/filestrategy.py
  function parse_file (line 21) | async def parse_file(
  class FileStrategy (line 53) | class FileStrategy(Strategy):
    method __init__ (line 58) | def __init__(
    method setup_search_manager (line 92) | def setup_search_manager(self):
    method setup (line 106) | async def setup(self):
    method run (line 119) | async def run(self):
  class UploadUserFileStrategy (line 149) | class UploadUserFileStrategy:
    method __init__ (line 154) | def __init__(
    method add_file (line 183) | async def add_file(self, file: File, user_oid: str):
    method remove_file (line 196) | async def remove_file(self, filename: str, oid: str):

FILE: app/backend/prepdocslib/htmlparser.py
  function cleanup_data (line 14) | def cleanup_data(data: str) -> str:
  class LocalHTMLParser (line 31) | class LocalHTMLParser(Parser):
    method parse (line 34) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:

FILE: app/backend/prepdocslib/integratedvectorizerstrategy.py
  class IntegratedVectorizerStrategy (line 32) | class IntegratedVectorizerStrategy(Strategy):  # pragma: no cover
    method __init__ (line 37) | def __init__(
    method create_embedding_skill (line 70) | async def create_embedding_skill(self, index_name: str) -> SearchIndex...
    method setup (line 136) | async def setup(self):
    method run (line 168) | async def run(self):

FILE: app/backend/prepdocslib/jsonparser.py
  class JsonParser (line 9) | class JsonParser(Parser):
    method parse (line 14) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:

FILE: app/backend/prepdocslib/listfilestrategy.py
  class File (line 14) | class File:
    method __init__ (line 20) | def __init__(self, content: IO, acls: Optional[dict[str, list]] = None...
    method filename (line 25) | def filename(self) -> str:
    method file_extension (line 52) | def file_extension(self):
    method filename_to_id (line 55) | def filename_to_id(self):
    method close (line 63) | def close(self):
  class ListFileStrategy (line 68) | class ListFileStrategy(ABC):
    method list (line 73) | async def list(self) -> AsyncGenerator[File, None]:
    method list_paths (line 77) | async def list_paths(self) -> AsyncGenerator[str, None]:
  class LocalListFileStrategy (line 82) | class LocalListFileStrategy(ListFileStrategy):
    method __init__ (line 87) | def __init__(self, path_pattern: str, enable_global_documents: bool = ...
    method list_paths (line 91) | async def list_paths(self) -> AsyncGenerator[str, None]:
    method _list_paths (line 95) | async def _list_paths(self, path_pattern: str) -> AsyncGenerator[str, ...
    method list (line 104) | async def list(self) -> AsyncGenerator[File, None]:
    method check_md5 (line 110) | def check_md5(self, path: str) -> bool:

FILE: app/backend/prepdocslib/mediadescriber.py
  class MediaDescriber (line 23) | class MediaDescriber(ABC):
    method describe_image (line 25) | async def describe_image(self, image_bytes) -> str:
  class ContentUnderstandingDescriber (line 29) | class ContentUnderstandingDescriber(MediaDescriber):
    method __init__ (line 51) | def __init__(self, endpoint: str, credential: AsyncTokenCredential):
    method poll_api (line 55) | async def poll_api(self, session, poll_url, headers):
    method create_analyzer (line 70) | async def create_analyzer(self):
    method describe_image (line 96) | async def describe_image(self, image_bytes: bytes) -> str:
  class MultimodalModelDescriber (line 119) | class MultimodalModelDescriber(MediaDescriber):
    method __init__ (line 120) | def __init__(self, openai_client: AsyncOpenAI, model: str, deployment:...
    method describe_image (line 125) | async def describe_image(self, image_bytes: bytes) -> str:

FILE: app/backend/prepdocslib/page.py
  class ImageOnPage (line 7) | class ImageOnPage:
    method to_skill_payload (line 20) | def to_skill_payload(
    method from_skill_payload (line 40) | def from_skill_payload(cls, data: dict[str, Any]) -> tuple["ImageOnPag...
  class Page (line 92) | class Page:
  class Chunk (line 110) | class Chunk:

FILE: app/backend/prepdocslib/parser.py
  class Parser (line 8) | class Parser(ABC):
    method parse (line 13) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:

FILE: app/backend/prepdocslib/pdfparser.py
  class LocalPdfParser (line 29) | class LocalPdfParser(Parser):
    method parse (line 35) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:
  class DocumentAnalysisParser (line 47) | class DocumentAnalysisParser(Parser):
    method __init__ (line 53) | def __init__(
    method parse (line 65) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:
    method figure_to_image (line 197) | async def figure_to_image(doc: pymupdf.Document, figure: DocumentFigur...
    method table_to_html (line 239) | def table_to_html(table: DocumentTable):
    method crop_image_from_pdf_page (line 260) | def crop_image_from_pdf_page(

FILE: app/backend/prepdocslib/searchmanager.py
  class Section (line 53) | class Section:
    method __init__ (line 58) | def __init__(self, chunk: Chunk, content: File, category: Optional[str...
  class SearchManager (line 65) | class SearchManager:
    method __init__ (line 71) | def __init__(
    method create_index (line 96) | async def create_index(self):
    method create_knowledgebase (line 484) | async def create_knowledgebase(self):
    method update_content (line 595) | async def update_content(self, sections: list[Section], url: Optional[...
    method remove_content (line 647) | async def remove_content(self, path: Optional[str] = None, only_oid: O...

FILE: app/backend/prepdocslib/servicesetup.py
  function clean_key_if_exists (line 30) | def clean_key_if_exists(key: Optional[str]) -> Optional[str]:
  class OpenAIHost (line 37) | class OpenAIHost(str, Enum):
  function setup_search_info (line 52) | def setup_search_info(
  function setup_openai_client (line 84) | def setup_openai_client(
  function setup_image_embeddings_service (line 141) | def setup_image_embeddings_service(
  function setup_embeddings_service (line 157) | def setup_embeddings_service(
  function setup_blob_manager (line 182) | def setup_blob_manager(
  function setup_figure_processor (line 212) | def setup_figure_processor(
  function build_file_processors (line 246) | def build_file_processors(
  function select_processor_for_filename (line 317) | def select_processor_for_filename(file_name: str, file_processors: dict[...

FILE: app/backend/prepdocslib/strategy.py
  class SearchInfo (line 13) | class SearchInfo:
    method __init__ (line 19) | def __init__(
    method create_search_client (line 41) | def create_search_client(self) -> SearchClient:
    method create_search_index_client (line 44) | def create_search_index_client(self) -> SearchIndexClient:
    method create_search_indexer_client (line 47) | def create_search_indexer_client(self) -> SearchIndexerClient:
  class DocumentAction (line 51) | class DocumentAction(Enum):
  class Strategy (line 57) | class Strategy(ABC):
    method setup (line 62) | async def setup(self):
    method run (line 65) | async def run(self):

FILE: app/backend/prepdocslib/textparser.py
  function cleanup_data (line 9) | def cleanup_data(data: str) -> str:
  class TextParser (line 24) | class TextParser(Parser):
    method parse (line 27) | async def parse(self, content: IO) -> AsyncGenerator[Page, None]:

FILE: app/backend/prepdocslib/textprocessor.py
  function combine_text_with_figures (line 14) | def combine_text_with_figures(page: "Page") -> None:
  function process_text (line 27) | def process_text(

FILE: app/backend/prepdocslib/textsplitter.py
  class TextSplitter (line 15) | class TextSplitter(ABC):
    method split_pages (line 22) | def split_pages(self, pages: list[Page]) -> Generator[Chunk, None, None]:
  function _safe_concat (line 88) | def _safe_concat(a: str, b: str) -> str:
  function _normalize_chunk (line 111) | def _normalize_chunk(text: str, max_chars: int) -> str:
  class _ChunkBuilder (line 133) | class _ChunkBuilder:
    method can_fit (line 153) | def can_fit(self, text: str, token_count: int) -> bool:
    method add (line 161) | def add(self, text: str, token_count: int) -> bool:
    method force_append (line 168) | def force_append(self, text: str):
    method flush_into (line 171) | def flush_into(self, out: list[Chunk]):
    method has_content (line 180) | def has_content(self) -> bool:
    method append_figure_and_flush (line 183) | def append_figure_and_flush(self, figure_text: str, out: list[Chunk]):
  class SentenceTextSplitter (line 189) | class SentenceTextSplitter(TextSplitter):
    method __init__ (line 194) | def __init__(self, max_tokens_per_section: int = 500):
    method _find_split_pos (line 206) | def _find_split_pos(self, text: str) -> tuple[int, bool]:
    method split_page_by_max_tokens (line 247) | def split_page_by_max_tokens(self, page_num: int, text: str) -> Genera...
    method _is_heading_like (line 273) | def _is_heading_like(self, line: str) -> bool:
    method _should_cross_page_overlap (line 292) | def _should_cross_page_overlap(self, prev: Chunk, nxt: Chunk) -> bool:
    method _append_overlap (line 311) | def _append_overlap(self, prev_chunk: Chunk, next_chunk: Chunk) -> Chunk:
    method split_pages (line 380) | def split_pages(self, pages: list[Page]) -> Generator[Chunk, None, None]:
  class SimpleTextSplitter (line 586) | class SimpleTextSplitter(TextSplitter):
    method __init__ (line 592) | def __init__(self, max_object_length: int = 1000):
    method split_pages (line 595) | def split_pages(self, pages: list[Page]) -> Generator[Chunk, None, None]:

FILE: app/backend/setup_cloud_ingestion.py
  function setup_cloud_ingestion_strategy (line 29) | async def setup_cloud_ingestion_strategy(
  function main (line 148) | async def main():

FILE: app/frontend/src/api/api.ts
  constant BACKEND_URI (line 1) | const BACKEND_URI = "";
  function getHeaders (line 6) | async function getHeaders(idToken: string | undefined): Promise<Record<s...
  function configApi (line 17) | async function configApi(): Promise<Config> {
  function chatApi (line 25) | async function chatApi(request: ChatAppRequest, shouldStream: boolean, i...
  function getSpeechApi (line 39) | async function getSpeechApi(text: string): Promise<string | null> {
  function getCitationFilePath (line 63) | function getCitationFilePath(citation: string): string {
  function uploadFileApi (line 69) | async function uploadFileApi(request: FormData, idToken: string): Promis...
  function deleteUploadedFileApi (line 84) | async function deleteUploadedFileApi(filename: string, idToken: string):...
  function listUploadedFilesApi (line 100) | async function listUploadedFilesApi(idToken: string): Promise<string[]> {
  function postChatHistoryApi (line 114) | async function postChatHistoryApi(item: any, idToken: string): Promise<a...
  function getChatHistoryListApi (line 130) | async function getChatHistoryListApi(count: number, continuationToken: s...
  function getChatHistoryApi (line 150) | async function getChatHistoryApi(id: string, idToken: string): Promise<H...
  function deleteChatHistoryApi (line 165) | async function deleteChatHistoryApi(id: string, idToken: string): Promis...

FILE: app/frontend/src/api/models.ts
  type RetrievalMode (line 1) | const enum RetrievalMode {
  type ChatAppRequestOverrides (line 7) | type ChatAppRequestOverrides = {
  type ResponseMessage (line 35) | type ResponseMessage = {
  type Thoughts (line 40) | type Thoughts = {
  type ActivityDetail (line 46) | type ActivityDetail = {
  type ExternalResultMetadata (line 55) | type ExternalResultMetadata = {
  type CitationActivityDetail (line 63) | type CitationActivityDetail = {
  type DataPoints (line 71) | type DataPoints = {
  type ResponseContext (line 79) | type ResponseContext = {
  type ChatAppResponseOrError (line 86) | type ChatAppResponseOrError = {
  type ChatAppResponse (line 94) | type ChatAppResponse = {
  type ChatAppRequestContext (line 101) | type ChatAppRequestContext = {
  type ChatAppRequest (line 105) | type ChatAppRequest = {
  type Config (line 111) | type Config = {
  type SimpleAPIResponse (line 136) | type SimpleAPIResponse = {
  type SpeechConfig (line 140) | interface SpeechConfig {
  type HistoryListApiResponse (line 148) | type HistoryListApiResponse = {
  type HistoryApiResponse (line 158) | type HistoryApiResponse = {

FILE: app/frontend/src/authConfig.ts
  type AppServicesToken (line 9) | interface AppServicesToken {
  type AuthSetup (line 16) | interface AuthSetup {
  function fetchAuthSetup (line 56) | async function fetchAuthSetup(): Promise<AuthSetup> {

FILE: app/frontend/src/components/AnalysisPanel/AgentPlan.tsx
  type Props (line 102) | interface Props {

FILE: app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx
  type Props (line 14) | interface Props {

FILE: app/frontend/src/components/AnalysisPanel/AnalysisPanelTabs.tsx
  type AnalysisPanelTabs (line 1) | enum AnalysisPanelTabs {

FILE: app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
  type Props (line 14) | interface Props {
  function truncateImageUrl (line 20) | function truncateImageUrl(val: string) {

FILE: app/frontend/src/components/AnalysisPanel/TokenUsageGraph.tsx
  type TokenUsage (line 4) | interface TokenUsage {
  type TokenLabelKey (line 11) | type TokenLabelKey = "prompt" | "reasoning" | "output" | "total";
  type AdditionalTotal (line 13) | type AdditionalTotal = {
  type SupplementaryUsage (line 19) | type SupplementaryUsage = {
  type PercentBase (line 26) | type PercentBase = number | undefined;
  type TokenUsageSegmentLabels (line 37) | interface TokenUsageSegmentLabels {
  type TokenUsageStackedBarProps (line 43) | interface TokenUsageStackedBarProps {
  type TokenUsageValueBarTone (line 87) | type TokenUsageValueBarTone = "primary" | "secondary";
  type TokenUsageValueBarGrouping (line 88) | type TokenUsageValueBarGrouping = "grouped" | "standalone";
  type TokenUsageValueBarProps (line 90) | interface TokenUsageValueBarProps {
  type TokenUsageGraphProps (line 117) | interface TokenUsageGraphProps {

FILE: app/frontend/src/components/AnalysisPanel/agentPlanUtils.ts
  type QueryPlanStep (line 1) | type QueryPlanStep = {
  function getStepLabel (line 39) | function getStepLabel(step: QueryPlanStep): string {

FILE: app/frontend/src/components/Answer/Answer.tsx
  type Props (line 17) | interface Props {

FILE: app/frontend/src/components/Answer/AnswerError.tsx
  type Props (line 6) | interface Props {

FILE: app/frontend/src/components/Answer/AnswerParser.tsx
  type CitationDetail (line 5) | type CitationDetail = {
  type CitationFragment (line 15) | type CitationFragment =
  type ActivityStepMeta (line 22) | type ActivityStepMeta = {
  type HtmlParsedAnswer (line 27) | type HtmlParsedAnswer = {
  function parseAnswerToHtml (line 206) | function parseAnswerToHtml(answer: ChatAppResponse, isStreaming: boolean...
  function extractCitationDetails (line 216) | function extractCitationDetails(answer: ChatAppResponse, isStreaming = f...

FILE: app/frontend/src/components/Answer/SpeechOutputAzure.tsx
  type Props (line 7) | interface Props {

FILE: app/frontend/src/components/Answer/SpeechOutputBrowser.tsx
  type Props (line 7) | interface Props {

FILE: app/frontend/src/components/ClearChatButton/ClearChatButton.tsx
  type Props (line 7) | interface Props {

FILE: app/frontend/src/components/Example/Example.tsx
  type Props (line 3) | interface Props {

FILE: app/frontend/src/components/Example/ExampleList.tsx
  type Props (line 6) | interface Props {

FILE: app/frontend/src/components/HelpCallout/HelpCallout.tsx
  type IHelpCalloutProps (line 6) | interface IHelpCalloutProps {

FILE: app/frontend/src/components/HistoryButton/HistoryButton.tsx
  type Props (line 7) | interface Props {

FILE: app/frontend/src/components/HistoryItem/HistoryItem.tsx
  type HistoryData (line 7) | interface HistoryData {
  type HistoryItemProps (line 13) | interface HistoryItemProps {
  function HistoryItem (line 19) | function HistoryItem({ item, onSelect, onDelete }: HistoryItemProps) {
  function DeleteHistoryModal (line 40) | function DeleteHistoryModal({ isOpen, onClose, onConfirm }: { isOpen: bo...

FILE: app/frontend/src/components/HistoryPanel/HistoryPanel.tsx
  constant HISTORY_COUNT_PER_LOAD (line 12) | const HISTORY_COUNT_PER_LOAD = 20;
  function groupHistory (line 115) | function groupHistory(history: HistoryData[]) {

FILE: app/frontend/src/components/HistoryProviders/CosmosDB.ts
  class CosmosDBProvider (line 4) | class CosmosDBProvider implements IHistoryProvider {
    method resetContinuationToken (line 10) | resetContinuationToken() {
    method getNextItems (line 15) | async getNextItems(count: number, idToken?: string): Promise<HistoryMe...
    method addItem (line 37) | async addItem(id: string, answers: Answers, idToken?: string): Promise...
    method getItem (line 42) | async getItem(id: string, idToken?: string): Promise<Answers | null> {
    method deleteItem (line 47) | async deleteItem(id: string, idToken?: string): Promise<void> {

FILE: app/frontend/src/components/HistoryProviders/IProvider.ts
  type HistoryMetaData (line 3) | type HistoryMetaData = { id: string; title: string; timestamp: number };
  type Answers (line 4) | type Answers = [user: string, response: ChatAppResponse][];
  type HistoryProviderOptions (line 6) | const enum HistoryProviderOptions {
  type IHistoryProvider (line 12) | interface IHistoryProvider {

FILE: app/frontend/src/components/HistoryProviders/IndexedDB.ts
  class IndexedDBProvider (line 4) | class IndexedDBProvider implements IHistoryProvider {
    method constructor (line 13) | constructor(dbName: string, storeName: string) {
    method init (line 20) | private async init() {
    method resetContinuationToken (line 35) | resetContinuationToken() {
    method getNextItems (line 40) | async getNextItems(count: number): Promise<HistoryMetaData[]> {
    method addItem (line 77) | async addItem(id: string, answers: Answers): Promise<void> {
    method getItem (line 92) | async getItem(id: string): Promise<Answers | null> {
    method deleteItem (line 99) | async deleteItem(id: string): Promise<void> {

FILE: app/frontend/src/components/HistoryProviders/None.ts
  class NoneProvider (line 3) | class NoneProvider implements IHistoryProvider {
    method resetContinuationToken (line 5) | resetContinuationToken(): void {
    method getNextItems (line 8) | async getNextItems(count: number): Promise<HistoryMetaData[]> {
    method addItem (line 11) | async addItem(id: string, answers: Answers): Promise<void> {
    method getItem (line 14) | async getItem(id: string): Promise<null> {
    method deleteItem (line 17) | async deleteItem(id: string): Promise<void> {

FILE: app/frontend/src/components/MarkdownViewer/MarkdownViewer.tsx
  type MarkdownViewerProps (line 10) | interface MarkdownViewerProps {

FILE: app/frontend/src/components/QuestionInput/QuestionInput.tsx
  type Props (line 18) | interface Props {

FILE: app/frontend/src/components/QuestionInput/SpeechInput.tsx
  type Props (line 8) | interface Props {

FILE: app/frontend/src/components/Settings/Settings.tsx
  type SettingsProps (line 10) | interface SettingsProps {

FILE: app/frontend/src/components/SettingsButton/SettingsButton.tsx
  type Props (line 7) | interface Props {

FILE: app/frontend/src/components/SupportingContent/SupportingContent.tsx
  type Props (line 8) | interface Props {

FILE: app/frontend/src/components/SupportingContent/SupportingContentParser.ts
  type ParsedSupportingContentItem (line 3) | type ParsedSupportingContentItem = {
  function parseSupportingContentItem (line 8) | function parseSupportingContentItem(item: string): ParsedSupportingConte...

FILE: app/frontend/src/components/TokenClaimsDisplay/TokenClaimsDisplay.tsx
  type Claim (line 16) | type Claim = {

FILE: app/frontend/src/components/UploadFile/UploadFile.tsx
  type Props (line 11) | interface Props {

FILE: app/frontend/src/components/UserChatMessage/UserChatMessage.tsx
  type Props (line 3) | interface Props {

FILE: app/frontend/src/components/VectorSettings/VectorSettings.tsx
  type Props (line 10) | interface Props {

FILE: app/frontend/src/i18n/LanguagePicker.tsx
  type Props (line 9) | interface Props {

FILE: app/frontend/src/pages/NoPage.tsx
  function Component (line 3) | function Component(): JSX.Element {

FILE: app/functions/document_extractor/function_app.py
  class GlobalSettings (line 34) | class GlobalSettings:
  function configure_global_settings (line 48) | def configure_global_settings():
  function extract_document (line 112) | async def extract_document(req: func.HttpRequest) -> func.HttpResponse:
  function process_document (line 205) | async def process_document(data: dict[str, Any]) -> dict[str, Any]:
  function get_file_acls (line 262) | async def get_file_acls(file_path: str) -> tuple[list[str], list[str]]:
  function build_document_components (line 338) | def build_document_components(

FILE: app/functions/figure_processor/function_app.py
  class GlobalSettings (line 40) | class GlobalSettings:
  function configure_global_settings (line 49) | def configure_global_settings():
  function process_figure_request (line 129) | async def process_figure_request(req: func.HttpRequest) -> func.HttpResp...

FILE: app/functions/text_processor/function_app.py
  class GlobalSettings (line 36) | class GlobalSettings:
  function configure_global_settings (line 48) | def configure_global_settings():
  function process_text_entry (line 119) | async def process_text_entry(req: func.HttpRequest) -> func.HttpResponse:
  function process_document (line 173) | async def process_document(data: dict[str, Any]) -> list[dict[str, Any]]:

FILE: evals/evaluate.py
  class AnyCitationMetric (line 39) | class AnyCitationMetric(BaseMetric):
    method evaluator_fn (line 43) | def evaluator_fn(cls, **kwargs):
    method get_aggregate_stats (line 53) | def get_aggregate_stats(cls, df):
  class CitationsMatchedMetric (line 61) | class CitationsMatchedMetric(BaseMetric):
    method evaluator_fn (line 65) | def evaluator_fn(cls, **kwargs):
    method get_aggregate_stats (line 81) | def get_aggregate_stats(cls, df):
  function get_openai_config (line 89) | def get_openai_config():
  function get_azure_credential (line 98) | def get_azure_credential():

FILE: evals/generate_ground_truth.py
  function get_azure_credential (line 25) | def get_azure_credential():
  function get_search_documents (line 36) | def get_search_documents(azure_credential, num_search_documents=None) ->...
  function generate_ground_truth_ragas (line 55) | def generate_ground_truth_ragas(num_questions=200, num_search_documents=...

FILE: evals/safety_evaluation.py
  class HarmSeverityLevel (line 26) | class HarmSeverityLevel(Enum):
  function get_azure_credential (line 38) | def get_azure_credential():
  function callback (line 49) | async def callback(
  function run_simulator (line 91) | async def run_simulator(target_url: str, max_simulations: int):

FILE: locustfile.py
  class ChatUser (line 7) | class ChatUser(HttpUser):
    method ask_question (line 11) | def ask_question(self):

FILE: scripts/adlsgen2setup.py
  class AdlsGen2Setup (line 21) | class AdlsGen2Setup:
    method __init__ (line 26) | def __init__(
    method run (line 59) | async def run(self):
    method create_service_client (line 113) | def create_service_client(self):
    method upload_file (line 118) | async def upload_file(self, directory_client: DataLakeDirectoryClient,...
    method create_or_get_group (line 123) | async def create_or_get_group(self, group_name: str):
  function main (line 157) | async def main(args: Any):

FILE: scripts/auth_common.py
  function get_application (line 8) | async def get_application(graph_client: GraphServiceClient, client_id: s...
  function test_authentication_enabled (line 18) | def test_authentication_enabled():

FILE: scripts/auth_init.py
  function create_application (line 35) | async def create_application(graph_client: GraphServiceClient, request_a...
  function add_client_secret (line 50) | async def add_client_secret(graph_client: GraphServiceClient, app_id: st...
  function create_or_update_application_with_secret (line 62) | async def create_or_update_application_with_secret(
  function update_azd_env (line 89) | def update_azd_env(name, val):
  function random_app_identifier (line 93) | def random_app_identifier():
  function server_app_initial (line 99) | def server_app_initial(identifier: int) -> Application:
  function server_app_permission_setup (line 106) | def server_app_permission_setup(server_app_id: str) -> Application:
  function client_app (line 154) | def client_app(server_app_id: str, server_app: Application, identifier: ...
  function server_app_known_client_application (line 188) | def server_app_known_client_application(client_app_id: str) -> Application:
  class GrantDefinition (line 197) | class GrantDefinition:
    method scope_string (line 203) | def scope_string(self) -> str:
  function grant_application_admin_consent (line 209) | async def grant_application_admin_consent(graph_client: GraphServiceClie...
  function main (line 279) | async def main():  # pragma: no cover

FILE: scripts/auth_update.py
  function main (line 15) | async def main():

FILE: scripts/copy_prepdocslib.py
  function copy_tree (line 15) | def copy_tree(src: Path, dest: Path) -> None:
  function main (line 21) | def main() -> None:

FILE: scripts/cosmosdb_migration.py
  class CosmosDBMigrator (line 38) | class CosmosDBMigrator:
    method __init__ (line 43) | def __init__(self, cosmos_account, database_name, credential=None):
    method connect (line 60) | async def connect(self):
    method migrate (line 79) | async def migrate(self):
    method close (line 131) | async def close(self):
  function migrate_cosmosdb_data (line 139) | async def migrate_cosmosdb_data():

FILE: scripts/load_azd_env.py
  function load_azd_env (line 10) | def load_azd_env():

FILE: scripts/manageacl.py
  class ManageAcl (line 27) | class ManageAcl:
    method __init__ (line 32) | def __init__(
    method run (line 70) | async def run(self):
    method view_acl (line 94) | async def view_acl(self, search_client: SearchClient):
    method remove_acl (line 100) | async def remove_acl(self, search_client: SearchClient):
    method remove_all_acls (line 116) | async def remove_all_acls(self, search_client: SearchClient):
    method add_acl (line 130) | async def add_acl(self, search_client: SearchClient):
    method get_documents (line 146) | async def get_documents(self, search_client: SearchClient):
    method enable_acls (line 160) | async def enable_acls(self, endpoint: str):
    method update_storage_urls (line 198) | async def update_storage_urls(self, search_client: SearchClient):
    method enable_global_access (line 228) | async def enable_global_access(self, search_client: SearchClient):
  function main (line 260) | async def main(args: Any):

FILE: scripts/verify_search_index_acls.py
  function main (line 16) | async def main():

FILE: tests/conftest.py
  function mock_search (line 78) | async def mock_search(self, *args, **kwargs):
  function mock_azurehttp_calls (line 85) | def mock_azurehttp_calls(monkeypatch):
  function mock_speech_success (line 98) | def mock_speech_success(monkeypatch):
  function mock_speech_cancelled (line 103) | def mock_speech_cancelled(monkeypatch):
  function mock_speech_failed (line 108) | def mock_speech_failed(monkeypatch):
  function mock_openai_embedding (line 113) | def mock_openai_embedding(monkeypatch):
  function mock_openai_chatcompletion (line 139) | def mock_openai_chatcompletion(monkeypatch):
  function mock_acs_search (line 291) | def mock_acs_search(monkeypatch):
  function mock_search_knowledgebase (line 301) | def mock_search_knowledgebase(monkeypatch):
  function mock_acs_search_filter (line 311) | def mock_acs_search_filter(monkeypatch):
  function mock_blob_container_client (line 321) | def mock_blob_container_client(monkeypatch):
  function mock_blob_container_client_exists (line 326) | def mock_blob_container_client_exists(monkeypatch):
  function mock_blob_container_client_does_not_exist (line 334) | def mock_blob_container_client_does_not_exist(monkeypatch):
  function mock_env (line 503) | def mock_env(monkeypatch, request):
  function mock_reasoning_env (line 530) | def mock_reasoning_env(monkeypatch, request):
  function mock_knowledgebase_env (line 557) | def mock_knowledgebase_env(monkeypatch, request):
  function mock_knowledgebase_auth_env (line 581) | def mock_knowledgebase_auth_env(monkeypatch, request):
  function mock_vision_env (line 605) | def mock_vision_env(monkeypatch, request):
  function mock_vision_auth_env (line 624) | def mock_vision_auth_env(monkeypatch, request):
  function client (line 643) | async def client(
  function reasoning_client (line 662) | async def reasoning_client(
  function knowledgebase_client (line 681) | async def knowledgebase_client(
  function knowledgebase_auth_client (line 701) | async def knowledgebase_auth_client(
  function client_with_expiring_token (line 726) | async def client_with_expiring_token(
  function auth_client (line 746) | async def auth_client(
  function auth_public_documents_client (line 785) | async def auth_public_documents_client(
  function vision_client (line 829) | async def vision_client(
  function vision_auth_client (line 856) | async def vision_auth_client(
  function mock_validate_token_success (line 885) | def mock_validate_token_success(monkeypatch):
  function mock_confidential_client_success (line 893) | def mock_confidential_client_success(monkeypatch):
  function mock_confidential_client_unauthorized (line 911) | def mock_confidential_client_unauthorized(monkeypatch):
  function mock_data_lake_service_client (line 929) | def mock_data_lake_service_client(monkeypatch):
  function mock_user_directory_client (line 1123) | def mock_user_directory_client(monkeypatch):
  function chat_approach (line 1132) | def chat_approach():

FILE: tests/e2e.py
  function wait_for_server_ready (line 21) | def wait_for_server_ready(url: str, timeout: float = 10.0, check_interva...
  function free_port (line 36) | def free_port() -> int:
  function run_server (line 44) | def run_server(port: int):
  function live_server_url (line 70) | def live_server_url(mock_env, mock_acs_search, free_port: int) -> Genera...
  function sized_page (line 80) | def sized_page(page: Page, request):
  function test_home (line 86) | def test_home(page: Page, live_server_url: str):
  function test_chat (line 91) | def test_chat(sized_page: Page, live_server_url: str):
  function test_chat_stop_button_visibility (line 166) | def test_chat_stop_button_visibility(page: Page, live_server_url: str):
  function test_chat_stop_restores_question (line 205) | def test_chat_stop_restores_question(page: Page, live_server_url: str):
  function test_chat_customization (line 246) | def test_chat_customization(page: Page, live_server_url: str):
  function test_chat_customization_multimodal (line 317) | def test_chat_customization_multimodal(page: Page, live_server_url: str):
  function test_chat_nonstreaming (line 414) | def test_chat_nonstreaming(page: Page, live_server_url: str):
  function test_chat_followup_streaming (line 445) | def test_chat_followup_streaming(page: Page, live_server_url: str):
  function test_chat_followup_nonstreaming (line 490) | def test_chat_followup_nonstreaming(page: Page, live_server_url: str):
  function test_upload_hidden (line 528) | def test_upload_hidden(page: Page, live_server_url: str):
  function test_upload_disabled (line 578) | def test_upload_disabled(page: Page, live_server_url: str):
  function test_agentic_retrieval_effort_minimal_disables_web (line 629) | def test_agentic_retrieval_effort_minimal_disables_web(page: Page, live_...

FILE: tests/mocks.py
  class MockAzureCredential (line 47) | class MockAzureCredential(AsyncTokenCredential):
    method get_token (line 49) | async def get_token(self, *scopes, **kwargs):  # accept claims, enable...
  class MockAzureCredentialExpired (line 54) | class MockAzureCredentialExpired(AsyncTokenCredential):
    method __init__ (line 56) | def __init__(self):
    method get_token (line 59) | async def get_token(self, *scopes, **kwargs):
  class MockBlobClient (line 67) | class MockBlobClient:
    method download_blob (line 68) | async def download_blob(self):
  class MockBlob (line 72) | class MockBlob:
    method __init__ (line 73) | def __init__(self):
    method readall (line 78) | async def readall(self):
    method readinto (line 81) | async def readinto(self, buffer: BytesIO):
  class MockAiohttpClientResponse404 (line 85) | class MockAiohttpClientResponse404(aiohttp.ClientResponse):
    method __init__ (line 86) | def __init__(self, url, body_bytes, headers=None):
  class MockAiohttpClientResponse (line 95) | class MockAiohttpClientResponse(aiohttp.ClientResponse):
    method __init__ (line 96) | def __init__(self, url, body_bytes, headers=None):
  class MockTransport (line 105) | class MockTransport(AsyncHttpTransport):
    method send (line 106) | async def send(self, request: HttpRequest, **kwargs) -> AioHttpTranspo...
    method __aexit__ (line 120) | async def __aexit__(self, *args):
    method open (line 123) | async def open(self):
    method close (line 126) | async def close(self):
  class MockAsyncPageIterator (line 130) | class MockAsyncPageIterator:
    method __init__ (line 131) | def __init__(self, data):
    method __aiter__ (line 134) | def __aiter__(self):
    method __anext__ (line 137) | async def __anext__(self):
  class MockCaption (line 143) | class MockCaption:
    method __init__ (line 144) | def __init__(self, text, highlights=None, additional_properties=None):
  class MockAsyncSearchResultsIterator (line 150) | class MockAsyncSearchResultsIterator:
    method __init__ (line 151) | def __init__(self, search_text, vector_queries: Optional[list[VectorQu...
    method __aiter__ (line 302) | def __aiter__(self):
    method __anext__ (line 305) | async def __anext__(self):
    method get_count (line 310) | async def get_count(self):
    method by_page (line 313) | def by_page(self):
  class MockResponse (line 317) | class MockResponse:
    method __init__ (line 318) | def __init__(self, status, text=None, headers=None):
    method __aexit__ (line 323) | async def __aexit__(self, exc_type, exc, tb):
    method __aenter__ (line 326) | async def __aenter__(self):
    method text (line 329) | async def text(self):
    method json (line 332) | async def json(self):
    method raise_for_status (line 335) | def raise_for_status(self):
  class MockEmbeddingsClient (line 340) | class MockEmbeddingsClient:
    method __init__ (line 341) | def __init__(self, create_embedding_response: openai.types.CreateEmbed...
    method create (line 344) | async def create(self, *args, **kwargs) -> openai.types.CreateEmbeddin...
  class MockClient (line 348) | class MockClient:
    method __init__ (line 349) | def __init__(self, embeddings_client):
  function mock_vision_response (line 353) | def mock_vision_response():
  function create_mock_retrieve (line 375) | def create_mock_retrieve(response_type="default"):
  function mock_retrieval_response (line 423) | def mock_retrieval_response():
  function mock_retrieval_response_with_web (line 452) | def mock_retrieval_response_with_web():
  function mock_retrieval_response_with_sharepoint (line 499) | def mock_retrieval_response_with_sharepoint():
  function mock_retrieval_response_with_sorting (line 546) | def mock_retrieval_response_with_sorting():
  function mock_retrieval_response_with_top_limit (line 584) | def mock_retrieval_response_with_top_limit():
  class MockAudio (line 611) | class MockAudio:
    method __init__ (line 612) | def __init__(self, audio_data):
    method read (line 616) | def read(self):
  class MockSpeechSynthesisCancellationDetails (line 620) | class MockSpeechSynthesisCancellationDetails:
    method __init__ (line 621) | def __init__(self):
  class MockAudioCancelled (line 626) | class MockAudioCancelled:
    method __init__ (line 627) | def __init__(self, audio_data):
    method read (line 632) | def read(self):
  class MockAudioFailure (line 636) | class MockAudioFailure:
    method __init__ (line 637) | def __init__(self, audio_data):
    method read (line 641) | def read(self):
  class MockSynthesisResult (line 645) | class MockSynthesisResult:
    method __init__ (line 646) | def __init__(self, result):
    method get (line 649) | def get(self):
  class MockDirectoryClient (line 654) | class MockDirectoryClient:
    method get_directory_properties (line 655) | async def get_directory_properties(self):
    method get_access_control (line 659) | async def get_access_control(self):
    method get_file_client (line 663) | def get_file_client(self, filename):
  class MockFileClient (line 669) | class MockFileClient:
    method __init__ (line 670) | def __init__(self, path_name):
    method download_file (line 673) | async def download_file(self):
  function mock_speak_text_success (line 677) | def mock_speak_text_success(self, text):
  function mock_speak_text_cancelled (line 681) | def mock_speak_text_cancelled(self, text):
  function mock_speak_text_failed (line 685) | def mock_speak_text_failed(self, text):

FILE: tests/test_adlsgen2setup.py
  function mock_open (line 27) | def mock_open(monkeypatch):
  function mock_adlsgen2setup (line 42) | def mock_adlsgen2setup(monkeypatch):
  function mock_get_group_success (line 54) | def mock_get_group_success(monkeypatch):
  function mock_get_group_missing (line 74) | def mock_get_group_missing(monkeypatch):
  function mock_put_group (line 85) | def mock_put_group(monkeypatch):
  function test_adls_gen2_setup (line 101) | async def test_adls_gen2_setup(
  function test_adls_gen2_create_group (line 140) | async def test_adls_gen2_create_group(

FILE: tests/test_agentic_retrieval.py
  function test_agentic_retrieval_default_sort (line 18) | async def test_agentic_retrieval_default_sort(chat_approach, monkeypatch):
  function test_agentic_retrieval_no_references (line 44) | async def test_agentic_retrieval_no_references(chat_approach, monkeypatch):
  function test_agentic_retrieval_web_results (line 69) | async def test_agentic_retrieval_web_results(chat_approach, monkeypatch):
  function test_agentic_retrieval_sharepoint_results (line 100) | async def test_agentic_retrieval_sharepoint_results(chat_approach, monke...
  function test_agentic_retrieval_minimal_uses_query_rewrite (line 126) | async def test_agentic_retrieval_minimal_uses_query_rewrite(chat_approac...
  function test_agentic_retrieval_minimal_requires_string (line 174) | async def test_agentic_retrieval_minimal_requires_string(chat_approach):

FILE: tests/test_app.py
  function fake_response (line 15) | def fake_response(http_code):
  function messages_contains_text (line 45) | def messages_contains_text(messages, text):
  function pop_citation_activity_details (line 52) | def pop_citation_activity_details(result: dict[str, Any] | None):  # typ...
  function test_missing_env_vars (line 65) | async def test_missing_env_vars():
  function test_index (line 75) | async def test_index(client):
  function test_redirect (line 81) | async def test_redirect(client):
  function test_favicon (line 88) | async def test_favicon(client):
  function test_cors_notallowed (line 96) | async def test_cors_notallowed(client) -> None:
  function test_assets_route_delegates_to_send_from_directory (line 102) | async def test_assets_route_delegates_to_send_from_directory(client, mon...
  function test_cors_allowed (line 116) | async def test_cors_allowed(client) -> None:
  function test_chat_request_must_be_json (line 123) | async def test_chat_request_must_be_json(client):
  function test_send_text_sources_false (line 131) | async def test_send_text_sources_false(client):
  function test_search_image_embeddings_ignored_without_multimodal (line 148) | async def test_search_image_embeddings_ignored_without_multimodal(client):
  function test_content_file_missing_content_settings (line 167) | async def test_content_file_missing_content_settings(auth_client, monkey...
  function test_chat_stream_request_must_be_json (line 180) | async def test_chat_stream_request_must_be_json(client):
  function test_json_encoder_drops_optional_fields (line 187) | def test_json_encoder_drops_optional_fields():
  function test_auth_setup_returns_payload (line 197) | async def test_auth_setup_returns_payload(client):
  function test_chat_handle_exception (line 206) | async def test_chat_handle_exception(client, monkeypatch, snapshot, capl...
  function test_chat_stream_handle_exception (line 223) | async def test_chat_stream_handle_exception(client, monkeypatch, snapsho...
  function test_chat_handle_exception_contentsafety (line 240) | async def test_chat_handle_exception_contentsafety(client, monkeypatch, ...
  function test_chat_handle_exception_streaming (line 257) | async def test_chat_handle_exception_streaming(client, monkeypatch, snap...
  function test_chat_handle_exception_contentsafety_streaming (line 274) | async def test_chat_handle_exception_contentsafety_streaming(client, mon...
  function test_speech (line 289) | async def test_speech(client, mock_speech_success):
  function test_speech_token_refresh (line 301) | async def test_speech_token_refresh(client_with_expiring_token, mock_spe...
  function test_speech_request_must_be_json (line 332) | async def test_speech_request_must_be_json(client, mock_speech_success):
  function test_speech_request_cancelled (line 340) | async def test_speech_request_cancelled(client, mock_speech_cancelled):
  function test_speech_request_failed (line 353) | async def test_speech_request_failed(client, mock_speech_failed):
  function test_chat_text (line 366) | async def test_chat_text(client, snapshot):
  function test_chat_text_agent (line 385) | async def test_chat_text_agent(knowledgebase_client, snapshot):
  function test_chat_text_filter (line 402) | async def test_chat_text_filter(auth_client, snapshot):
  function test_chat_text_filter_agent (line 424) | async def test_chat_text_filter_agent(knowledgebase_auth_client, snapshot):
  function test_chat_text_filter_public_documents (line 446) | async def test_chat_text_filter_public_documents(auth_public_documents_c...
  function test_chat_text_semanticranker (line 470) | async def test_chat_text_semanticranker(client, snapshot):
  function test_chat_text_semanticcaptions (line 486) | async def test_chat_text_semanticcaptions(client, snapshot):
  function test_chat_prompt_template (line 502) | async def test_chat_prompt_template(client, snapshot):
  function test_chat_seed (line 519) | async def test_chat_seed(client, snapshot):
  function test_chat_hybrid (line 535) | async def test_chat_hybrid(client, snapshot):
  function test_chat_hybrid_semantic_ranker (line 555) | async def test_chat_hybrid_semantic_ranker(client, snapshot):
  function test_chat_hybrid_semantic_captions (line 578) | async def test_chat_hybrid_semantic_captions(client, snapshot):
  function test_chat_vector (line 602) | async def test_chat_vector(client, snapshot):
  function test_chat_vector_semantic_ranker (line 621) | async def test_chat_vector_semantic_ranker(client, snapshot):
  function test_chat_text_semantic_ranker (line 643) | async def test_chat_text_semantic_ranker(client, snapshot):
  function test_chat_stream_text (line 662) | async def test_chat_stream_text(client, snapshot):
  function test_chat_text_reasoning (line 678) | async def test_chat_text_reasoning(reasoning_client, snapshot):
  function test_chat_stream_text_reasoning (line 700) | async def test_chat_stream_text_reasoning(reasoning_client, snapshot):
  function test_chat_stream_text_filter (line 716) | async def test_chat_stream_text_filter(auth_client, snapshot):
  function test_chat_with_history (line 738) | async def test_chat_with_history(client, snapshot):
  function test_chat_session_state_persists (line 762) | async def test_chat_session_state_persists(client, snapshot):
  function test_chat_stream_session_state_persists (line 779) | async def test_chat_stream_session_state_persists(client, snapshot):
  function test_chat_followup (line 796) | async def test_chat_followup(client, snapshot):
  function test_chat_stream_followup (line 816) | async def test_chat_stream_followup(client, snapshot):
  function test_chat_vision (line 834) | async def test_chat_vision(monkeypatch, vision_client, snapshot):
  function test_chat_stream_vision (line 845) | async def test_chat_stream_vision(vision_client, snapshot):
  function test_chat_vision_user (line 856) | async def test_chat_vision_user(monkeypatch, vision_auth_client, mock_us...
  function test_format_as_ndjson (line 869) | async def test_format_as_ndjson():

FILE: tests/test_app_config.py
  function minimal_env (line 11) | def minimal_env(monkeypatch):
  function test_app_local_openai (line 27) | async def test_app_local_openai(monkeypatch, minimal_env):
  function test_app_azure_custom_key (line 38) | async def test_app_azure_custom_key(monkeypatch, minimal_env):
  function test_app_azure_custom_identity (line 50) | async def test_app_azure_custom_identity(monkeypatch, minimal_env):
  function test_app_user_upload_processors (line 64) | async def test_app_user_upload_processors(monkeypatch, minimal_env):
  function test_app_user_upload_requires_storage_configuration (line 78) | async def test_app_user_upload_requires_storage_configuration(monkeypatc...
  function test_app_user_upload_requires_enforce_access_control (line 91) | async def test_app_user_upload_requires_enforce_access_control(monkeypat...
  function test_app_user_upload_processors_docint (line 106) | async def test_app_user_upload_processors_docint(monkeypatch, minimal_env):
  function test_app_user_upload_processors_docint_localpdf (line 121) | async def test_app_user_upload_processors_docint_localpdf(monkeypatch, m...
  function test_app_user_upload_processors_docint_localhtml (line 138) | async def test_app_user_upload_processors_docint_localhtml(monkeypatch, ...
  function test_app_config_default (line 155) | async def test_app_config_default(monkeypatch, minimal_env):
  function test_app_config_use_vectors_true (line 169) | async def test_app_config_use_vectors_true(monkeypatch, minimal_env):
  function test_app_config_use_vectors_false (line 183) | async def test_app_config_use_vectors_false(monkeypatch, minimal_env):
  function test_app_config_semanticranker_free (line 197) | async def test_app_config_semanticranker_free(monkeypatch, minimal_env):
  function test_app_config_semanticranker_disabled (line 212) | async def test_app_config_semanticranker_disabled(monkeypatch, minimal_e...
  function test_app_config_user_upload (line 227) | async def test_app_config_user_upload(monkeypatch, minimal_env):
  function test_app_config_user_upload_novectors (line 245) | async def test_app_config_user_upload_novectors(monkeypatch, minimal_env):
  function test_app_config_user_upload_bad_openai_config (line 265) | async def test_app_config_user_upload_bad_openai_config(monkeypatch, min...
  function test_app_config_user_upload_openaicom (line 281) | async def test_app_config_user_upload_openaicom(monkeypatch, minimal_env):
  function test_app_config_for_client (line 299) | async def test_app_config_for_client(client):
  function test_app_config_for_reasoning (line 311) | async def test_app_config_for_reasoning(monkeypatch, minimal_env):
  function test_app_config_for_reasoning_without_streaming (line 325) | async def test_app_config_for_reasoning_without_streaming(monkeypatch, m...
  function test_app_config_for_reasoning_override_effort (line 339) | async def test_app_config_for_reasoning_override_effort(monkeypatch, min...
  function test_app_enables_azure_monitor_when_connection_string_set (line 354) | def test_app_enables_azure_monitor_when_connection_string_set(monkeypatch):

FILE: tests/test_auth_init.py
  function graph_client (line 35) | def graph_client(monkeypatch):
  class FakeRequestBuilder (line 105) | class FakeRequestBuilder:
    method __init__ (line 106) | def __init__(self, result):
    method get (line 109) | async def get(self):
  class FakeOAuthGrant (line 113) | class FakeOAuthGrant:
    method __init__ (line 114) | def __init__(self):
    method configure (line 120) | def configure(self, responses, raise_on_post=None):
    method next_response (line 126) | def next_response(self):
    method handle_post (line 131) | def handle_post(self, grant):
  function test_create_application_success (line 142) | async def test_create_application_success(graph_client):
  function test_create_application_missing_ids (line 152) | async def test_create_application_missing_ids(graph_client, monkeypatch):
  function test_add_client_secret_success (line 170) | async def test_add_client_secret_success(graph_client):
  function test_add_client_secret_missing_secret (line 178) | async def test_add_client_secret_missing_secret(graph_client):
  function test_create_or_update_application_creates_and_adds_secret (line 186) | async def test_create_or_update_application_creates_and_adds_secret(grap...
  function test_create_or_update_application_existing_adds_secret (line 217) | async def test_create_or_update_application_existing_adds_secret(graph_c...
  function test_create_or_update_application_existing_with_secret (line 248) | async def test_create_or_update_application_existing_with_secret(graph_c...
  function test_client_app_validation_errors (line 271) | def test_client_app_validation_errors():
  function test_client_app_success (line 286) | def test_client_app_success():
  function test_server_app_permission_setup (line 295) | def test_server_app_permission_setup():
  function test_grant_application_admin_consent_creates_grants (line 304) | async def test_grant_application_admin_consent_creates_grants(graph_clie...
  function test_grant_application_admin_consent_skips_existing_grants (line 321) | async def test_grant_application_admin_consent_skips_existing_grants(gra...
  function test_grant_application_admin_consent_handles_insufficient_permissions (line 339) | async def test_grant_application_admin_consent_handles_insufficient_perm...

FILE: tests/test_authenticationhelper.py
  function create_authentication_helper (line 28) | def create_authentication_helper(
  function create_search_client (line 44) | def create_search_client():
  function create_mock_jwt (line 48) | def create_mock_jwt(kid="mock_kid", oid="OID_X"):
  function test_get_auth_claims_success (line 79) | async def test_get_auth_claims_success(mock_confidential_client_success,...
  function test_get_auth_claims_success_no_required (line 87) | async def test_get_auth_claims_success_no_required(mock_confidential_cli...
  function test_get_auth_claims_unauthorized (line 95) | async def test_get_auth_claims_unauthorized(mock_confidential_client_una...
  function test_auth_setup (line 102) | def test_auth_setup(mock_confidential_client_success, mock_validate_toke...
  function test_auth_setup_required_access_control (line 108) | def test_auth_setup_required_access_control(mock_confidential_client_suc...
  function test_auth_setup_required_access_control_and_unauthenticated_access (line 114) | def test_auth_setup_required_access_control_and_unauthenticated_access(
  function test_get_auth_token (line 122) | def test_get_auth_token(mock_confidential_client_success, mock_validate_...
  function test_check_path_auth_denied (line 140) | async def test_check_path_auth_denied(monkeypatch, mock_confidential_cli...
  function test_check_path_auth_allowed_sourcepage (line 166) | async def test_check_path_auth_allowed_sourcepage(
  function test_check_path_auth_allowed_sourcefile (line 197) | async def test_check_path_auth_allowed_sourcefile(
  function test_check_path_auth_allowed_empty (line 225) | async def test_check_path_auth_allowed_empty(
  function test_check_path_auth_allowed_fragment (line 253) | async def test_check_path_auth_allowed_fragment(
  function test_check_path_auth_allowed_without_access_control (line 281) | async def test_check_path_auth_allowed_without_access_control(
  function test_create_pem_format (line 312) | async def test_create_pem_format(mock_confidential_client_success, mock_...
  function test_validate_access_token (line 394) | async def test_validate_access_token(monkeypatch, mock_confidential_clie...

FILE: tests/test_blob_manager.py
  function blob_manager (line 20) | def blob_manager():
  function adls_blob_manager (line 32) | def adls_blob_manager(monkeypatch):
  function test_upload_and_remove (line 43) | async def test_upload_and_remove(monkeypatch, mock_env, mock_blob_contai...
  function test_upload_and_remove_all (line 92) | async def test_upload_and_remove_all(monkeypatch, mock_env, mock_blob_co...
  function test_create_container_upon_upload (line 141) | async def test_create_container_upon_upload(monkeypatch, mock_env, blob_...
  function test_dont_remove_if_no_container (line 170) | async def test_dont_remove_if_no_container(
  function test_upload_document_image (line 185) | async def test_upload_document_image(monkeypatch, mock_env, directory_ex...
  function test_adls_upload_document_image (line 240) | async def test_adls_upload_document_image(monkeypatch, mock_env, adls_bl...
  function test_get_managed_identity_connection_string (line 287) | def test_get_managed_identity_connection_string(mock_env, blob_manager):
  function test_sourcepage_from_file_page (line 294) | def test_sourcepage_from_file_page():
  function test_blob_name_from_file_name (line 299) | def test_blob_name_from_file_name():
  function test_download_blob (line 305) | async def test_download_blob(monkeypatch, mock_env, mock_blob_container_...
  function test_download_blob_not_found (line 338) | async def test_download_blob_not_found(monkeypatch, mock_env, mock_blob_...
  function test_download_blob_container_not_exist (line 353) | async def test_download_blob_container_not_exist(
  function test_download_blob_empty_path (line 362) | async def test_download_blob_empty_path(monkeypatch, mock_env, mock_blob...
  function test_download_blob_with_user_oid (line 369) | async def test_download_blob_with_user_oid(monkeypatch, mock_env, blob_m...
  function test_download_blob_properties_none (line 377) | async def test_download_blob_properties_none(monkeypatch, mock_env, mock...
  function test_adls_download_blob_permission_denied (line 401) | async def test_adls_download_blob_permission_denied(monkeypatch, mock_en...
  function test_adls_download_blob_with_permission (line 419) | async def test_adls_download_blob_with_permission(

FILE: tests/test_chatapproach.py
  function mock_search (line 31) | async def mock_search(*args, **kwargs):
  function mock_retrieval (line 35) | async def mock_retrieval(*args, **kwargs):
  function test_get_search_query (line 39) | def test_get_search_query(chat_approach):
  function test_get_search_query_returns_default (line 106) | def test_get_search_query_returns_default(chat_approach):
  function test_get_search_query_returns_default_on_error (line 115) | def test_get_search_query_returns_default_on_error(chat_approach, monkey...
  function test_extract_rewritten_query_invalid_json (line 127) | def test_extract_rewritten_query_invalid_json(chat_approach):
  function test_extract_followup_questions (line 158) | def test_extract_followup_questions(chat_approach):
  function test_extract_followup_questions_three (line 165) | def test_extract_followup_questions_three(chat_approach):
  function test_extract_followup_questions_no_followup (line 180) | def test_extract_followup_questions_no_followup(chat_approach):
  function test_extract_followup_questions_no_pre_content (line 187) | def test_extract_followup_questions_no_pre_content(chat_approach):
  function test_search_results_filtering_by_scores (line 207) | async def test_search_results_filtering_by_scores(
  function test_search_results_query_rewriting (line 231) | async def test_search_results_query_rewriting(chat_approach, monkeypatch):
  function test_compute_multimodal_embedding (line 258) | async def test_compute_multimodal_embedding(monkeypatch, chat_approach):
  function test_compute_multimodal_embedding_no_client (line 282) | async def test_compute_multimodal_embedding_no_client():
  function test_chat_prompt_render_with_image_directive (line 313) | async def test_chat_prompt_render_with_image_directive(chat_approach):
  function test_get_sources_content_downloads_images_from_images_container (line 366) | async def test_get_sources_content_downloads_images_from_images_containe...
  function test_replace_all_ref_ids_unknown_fallback (line 401) | def test_replace_all_ref_ids_unknown_fallback(chat_approach):
  function test_replace_all_ref_ids_mixed (line 428) | def test_replace_all_ref_ids_mixed(chat_approach):
  function test_replace_all_ref_ids_sharepoint_priority (line 459) | def test_replace_all_ref_ids_sharepoint_priority(chat_approach):
  function test_get_sources_content_includes_sharepoint (line 477) | async def test_get_sources_content_includes_sharepoint(chat_approach):
  function test_select_knowledgebase_client_priorities (line 508) | def test_select_knowledgebase_client_priorities(chat_approach):
  function test_select_knowledgebase_client_requires_configuration (line 535) | def test_select_knowledgebase_client_requires_configuration(chat_approach):
  function test_run_with_streaming_handles_non_stream_response (line 545) | async def test_run_with_streaming_handles_non_stream_response(chat_appro...
  function test_run_until_final_call_rejects_web_streaming (line 590) | async def test_run_until_final_call_rejects_web_streaming(chat_approach):

FILE: tests/test_content_file.py
  function test_content_file (line 25) | async def test_content_file(monkeypatch, mock_env, mock_acs_search, mock...
  function test_content_file_useruploaded_found (line 84) | async def test_content_file_useruploaded_found(
  function test_content_file_useruploaded_notfound (line 134) | async def test_content_file_useruploaded_notfound(

FILE: tests/test_cosmosdb.py
  class MockCosmosDBResultsIterator (line 77) | class MockCosmosDBResultsIterator:
    method __init__ (line 78) | def __init__(self, data=[]):
    method __aiter__ (line 81) | def __aiter__(self):
    method __anext__ (line 84) | async def __anext__(self):
    method get_count (line 89) | async def get_count(self):
    method by_page (line 92) | def by_page(self, continuation_token=None):
  function test_chathistory_newitem (line 101) | async def test_chathistory_newitem(auth_public_documents_client, monkeyp...
  function test_chathistory_newitem_error_disabled (line 136) | async def test_chathistory_newitem_error_disabled(client, monkeypatch):
  function test_chathistory_newitem_error_container (line 150) | async def test_chathistory_newitem_error_container(auth_public_documents...
  function test_chathistory_newitem_error_entra (line 164) | async def test_chathistory_newitem_error_entra(auth_public_documents_cli...
  function test_chathistory_newitem_error_runtime (line 176) | async def test_chathistory_newitem_error_runtime(auth_public_documents_c...
  function test_chathistory_query (line 198) | async def test_chathistory_query(auth_public_documents_client, monkeypat...
  function test_chathistory_query_continuation (line 214) | async def test_chathistory_query_continuation(auth_public_documents_clie...
  function test_chathistory_query_error_disabled (line 230) | async def test_chathistory_query_error_disabled(client, monkeypatch):
  function test_chathistory_query_error_container (line 237) | async def test_chathistory_query_error_container(auth_public_documents_c...
  function test_chathistory_query_error_entra (line 246) | async def test_chathistory_query_error_entra(auth_public_documents_clien...
  function test_chathistory_query_error_runtime (line 252) | async def test_chathistory_query_error_runtime(auth_public_documents_cli...
  function test_chathistory_getitem (line 270) | async def test_chathistory_getitem(auth_public_documents_client, monkeyp...
  function test_chathistory_getitem_error_disabled (line 288) | async def test_chathistory_getitem_error_disabled(client, monkeypatch):
  function test_chathistory_getitem_error_container (line 298) | async def test_chathistory_getitem_error_container(auth_public_documents...
  function test_chathistory_getitem_error_entra (line 308) | async def test_chathistory_getitem_error_entra(auth_public_documents_cli...
  function test_chathistory_getitem_error_runtime (line 316) | async def test_chathistory_getitem_error_runtime(auth_public_documents_c...
  function test_chathistory_deleteitem (line 332) | async def test_chathistory_deleteitem(auth_public_documents_client, monk...
  function test_chathistory_deleteitem_error_disabled (line 360) | async def test_chathistory_deleteitem_error_disabled(client, monkeypatch):
  function test_chathistory_deleteitem_error_container (line 370) | async def test_chathistory_deleteitem_error_container(auth_public_docume...
  function test_chathistory_deleteitem_error_entra (line 380) | async def test_chathistory_deleteitem_error_entra(auth_public_documents_...
  function test_chathistory_deleteitem_error_runtime (line 388) | async def test_chathistory_deleteitem_error_runtime(auth_public_document...

FILE: tests/test_cosmosdb_migration.py
  class MockAsyncPageIterator (line 41) | class MockAsyncPageIterator:
    method __init__ (line 44) | def __init__(self, items):
    method __aiter__ (line 47) | def __aiter__(self):
    method __anext__ (line 50) | async def __anext__(self):
  class MockCosmosDBResultsIterator (line 56) | class MockCosmosDBResultsIterator:
    method __init__ (line 59) | def __init__(self, data=[]):
    method by_page (line 63) | def by_page(self, continuation_token=None):
  class MockPagesAsyncIterator (line 70) | class MockPagesAsyncIterator:
    method __init__ (line 73) | def __init__(self, data):
    method __aiter__ (line 77) | def __aiter__(self):
    method __anext__ (line 80) | async def __anext__(self):
  function test_migrate_method (line 88) | async def test_migrate_method():
  function test_migrate_cosmosdb_data (line 171) | async def test_migrate_cosmosdb_data(monkeypatch):

FILE: tests/test_csvparser.py
  function test_csvparser_single_row (line 9) | async def test_csvparser_single_row():
  function test_csvparser_multiple_rows (line 26) | async def test_csvparser_multiple_rows():
  function test_csvparser_empty_file (line 47) | async def test_csvparser_empty_file():

FILE: tests/test_function_apps.py
  class ChunkStub (line 22) | class ChunkStub:
  class SectionStub (line 29) | class SectionStub:
  function build_request (line 33) | def build_request(payload: dict[str, Any]) -> func.HttpRequest:
  function build_raw_request (line 45) | def build_raw_request(body: bytes) -> func.HttpRequest:
  function test_document_extractor_emits_pages_and_figures (line 57) | async def test_document_extractor_emits_pages_and_figures(monkeypatch: p...
  function test_document_extractor_with_adls_acls (line 138) | async def test_document_extractor_with_adls_acls(monkeypatch: pytest.Mon...
  function test_document_extractor_requires_single_record (line 212) | async def test_document_extractor_requires_single_record(monkeypatch: py...
  function test_document_extractor_handles_processing_exception (line 231) | async def test_document_extractor_handles_processing_exception(monkeypat...
  function test_document_extractor_invalid_json_returns_error (line 266) | async def test_document_extractor_invalid_json_returns_error() -> None:
  function test_document_extractor_process_document_http_error (line 274) | async def test_document_extractor_process_document_http_error(monkeypatc...
  function test_document_extractor_managed_identity_reload (line 310) | def test_document_extractor_managed_identity_reload(monkeypatch: pytest....
  function test_figure_processor_returns_enriched_metadata (line 328) | async def test_figure_processor_returns_enriched_metadata(monkeypatch: p...
  function test_figure_processor_invalid_json_returns_error (line 382) | async def test_figure_processor_invalid_json_returns_error(monkeypatch: ...
  function test_figure_processor_initialisation_with_env (line 393) | def test_figure_processor_initialisation_with_env(monkeypatch: pytest.Mo...
  function test_figure_processor_warns_when_openai_incomplete (line 468) | def test_figure_processor_warns_when_openai_incomplete(monkeypatch: pyte...
  function test_text_processor_builds_chunk_with_caption (line 481) | async def test_text_processor_builds_chunk_with_caption(monkeypatch: pyt...
  function test_document_extractor_without_settings (line 573) | async def test_document_extractor_without_settings(monkeypatch: pytest.M...
  function test_document_extractor_module_init_key_error (line 595) | def test_document_extractor_module_init_key_error(
  function setup_acl_mocks (line 626) | def setup_acl_mocks(monkeypatch: pytest.MonkeyPatch, acl_string: str, en...
  function test_get_file_acls_extracts_user_oids (line 663) | async def test_get_file_acls_extracts_user_oids(monkeypatch: pytest.Monk...
  function test_get_file_acls_extracts_group_ids (line 674) | async def test_get_file_acls_extracts_group_ids(monkeypatch: pytest.Monk...
  function test_get_file_acls_ignores_entries_without_read_permission (line 685) | async def test_get_file_acls_ignores_entries_without_read_permission(mon...
  function test_get_file_acls_other_read_with_global_access_enabled (line 697) | async def test_get_file_acls_other_read_with_global_access_enabled(monke...
  function test_get_file_acls_other_read_with_global_access_disabled (line 710) | async def test_get_file_acls_other_read_with_global_access_disabled(monk...
  function test_get_file_acls_other_read_execute_with_global_access_enabled (line 724) | async def test_get_file_acls_other_read_execute_with_global_access_enabl...
  function test_get_file_acls_other_no_read_does_not_grant_global (line 735) | async def test_get_file_acls_other_no_read_does_not_grant_global(monkeyp...
  function test_get_file_acls_malformed_acl_entry (line 750) | async def test_get_file_acls_malformed_acl_entry(monkeypatch: pytest.Mon...
  function test_get_file_acls_handles_exception (line 767) | async def test_get_file_acls_handles_exception(monkeypatch: pytest.Monke...
  function test_get_file_acls_raises_without_settings (line 801) | async def test_get_file_acls_raises_without_settings(monkeypatch: pytest...
  function test_get_file_acls_mixed_users_and_groups (line 810) | async def test_get_file_acls_mixed_users_and_groups(monkeypatch: pytest....
  function test_figure_processor_without_settings (line 824) | async def test_figure_processor_without_settings(monkeypatch: pytest.Mon...
  function test_text_processor_without_settings (line 851) | async def test_text_processor_without_settings(monkeypatch: pytest.Monke...
  function test_text_processor_invalid_json (line 879) | async def test_text_processor_invalid_json(monkeypatch: pytest.MonkeyPat...
  function test_text_processor_with_client_id (line 900) | async def test_text_processor_with_client_id(monkeypatch: pytest.MonkeyP...
  function test_text_processor_embeddings_setup (line 911) | async def test_text_processor_embeddings_setup(monkeypatch: pytest.Monke...
  function test_text_processor_configure_logs_when_embedding_config_missing (line 951) | def test_text_processor_configure_logs_when_embedding_config_missing(
  function test_text_processor_no_sections (line 971) | async def test_text_processor_no_sections(monkeypatch: pytest.MonkeyPatc...
  function test_text_processor_embeddings_not_initialized (line 1019) | async def test_text_processor_embeddings_not_initialized(monkeypatch: py...
  function test_text_processor_empty_chunk_skipped (line 1064) | async def test_text_processor_empty_chunk_skipped(monkeypatch: pytest.Mo...
  function test_text_processor_with_multimodal_embeddings (line 1115) | async def test_text_processor_with_multimodal_embeddings(monkeypatch: py...
  function test_text_processor_embedding_dimension_mismatch (line 1178) | async def test_text_processor_embedding_dimension_mismatch(monkeypatch: ...
  function test_text_processor_embeddings_missing_warning (line 1230) | async def test_text_processor_embeddings_missing_warning(monkeypatch: py...
  function test_text_processor_process_document_handles_missing_figures (line 1284) | async def test_text_processor_process_document_handles_missing_figures(
  function test_text_processor_process_document_returns_empty_when_no_pages (line 1335) | async def test_text_processor_process_document_returns_empty_when_no_pag...
  function test_text_processor_includes_acls_when_enabled (line 1353) | async def test_text_processor_includes_acls_when_enabled(monkeypatch: py...
  function test_text_processor_includes_empty_acls_when_enabled_but_none_found (line 1420) | async def test_text_processor_includes_empty_acls_when_enabled_but_none_...
  function test_text_processor_excludes_acls_when_disabled (line 1482) | async def test_text_processor_excludes_acls_when_disabled(monkeypatch: p...
  function test_text_processor_module_init_logs_warning (line 1545) | def test_text_processor_module_init_logs_warning(

FILE: tests/test_htmlparser.py
  function test_htmlparser_remove_new_lines (line 9) | async def test_htmlparser_remove_new_lines():
  function test_htmlparser_remove_white_spaces (line 21) | async def test_htmlparser_remove_white_spaces():
  function test_htmlparser_remove_hyphens (line 33) | async def test_htmlparser_remove_hyphens():
  function test_htmlparser_full (line 45) | async def test_htmlparser_full():

FILE: tests/test_jsonparser.py
  function test_jsonparser_single_obj (line 9) | async def test_jsonparser_single_obj():
  function test_jsonparser_array_multiple_obj (line 21) | async def test_jsonparser_array_multiple_obj():

FILE: tests/test_listfilestrategy.py
  function test_file_filename (line 14) | def test_file_filename():
  function test_file_file_extension (line 20) | def test_file_file_extension():
  function test_file_contextmanager (line 26) | def test_file_contextmanager():
  function test_file_filename_to_id (line 35) | def test_file_filename_to_id():
  function test_file_filename_to_id_acls (line 48) | def test_file_filename_to_id_acls():
  function test_locallistfilestrategy (line 60) | async def test_locallistfilestrategy():
  function test_locallistfilestrategy_nesteddir (line 89) | async def test_locallistfilestrategy_nesteddir():
  function test_locallistfilestrategy_checkmd5 (line 118) | def test_locallistfilestrategy_checkmd5():
  function test_locallistfilestrategy_global (line 136) | async def test_locallistfilestrategy_global():

FILE: tests/test_manageacl.py
  class AsyncSearchResultsIterator (line 18) | class AsyncSearchResultsIterator:
    method __init__ (line 19) | def __init__(self, results):
    method __aiter__ (line 23) | def __aiter__(self):
    method __anext__ (line 26) | async def __anext__(self):
    method by_page (line 33) | def by_page(self):
  class AsyncPageIterator (line 37) | class AsyncPageIterator:
    method __init__ (line 38) | def __init__(self, pages):
    method __aiter__ (line 42) | def __aiter__(self):
    method __anext__ (line 45) | async def __anext__(self):
  class AsyncPageContent (line 53) | class AsyncPageContent:
    method __init__ (line 54) | def __init__(self, items):
    method __aiter__ (line 58) | def __aiter__(self):
    method __anext__ (line 61) | async def __anext__(self):
  function test_view_acl (line 70) | async def test_view_acl(monkeypatch, capsys):
  function test_remove_acl (line 93) | async def test_remove_acl(monkeypatch, capsys):
  function test_remove_all_acl (line 127) | async def test_remove_all_acl(monkeypatch, capsys):
  function test_add_acl (line 161) | async def test_add_acl(monkeypatch, caplog):
  function test_update_storage_urls (line 212) | async def test_update_storage_urls(monkeypatch, caplog):
  function test_enable_global_access (line 250) | async def test_enable_global_access(monkeypatch, caplog):
  function test_enable_acls_with_missing_fields (line 298) | async def test_enable_acls_with_missing_fields(monkeypatch, capsys):
  function test_enable_acls_without_missing_fields (line 326) | async def test_enable_acls_without_missing_fields(monkeypatch, capsys):
  function validate_index (line 367) | def validate_index(index):

FILE: tests/test_mediadescriber.py
  function test_contentunderstanding_analyze (line 19) | async def test_contentunderstanding_analyze(monkeypatch, caplog):
  class MockAsyncOpenAI (line 144) | class MockAsyncOpenAI:
    method __init__ (line 145) | def __init__(self, test_response):
  class MockChatCompletions (line 150) | class MockChatCompletions:
    method __init__ (line 151) | def __init__(self, test_response):
    method create (line 155) | async def create(self, *args, **kwargs):
  function test_multimodal_model_describer (line 168) | async def test_multimodal_model_describer(monkeypatch, model, deployment...
  function test_multimodal_model_describer_empty_response (line 229) | async def test_multimodal_model_describer_empty_response(monkeypatch):

FILE: tests/test_pdfparser.py
  function sample_image (line 42) | def sample_image():
  function assert_image_equal (line 54) | def assert_image_equal(image1, image2):
  function test_crop_image_from_pdf_page (line 64) | def test_crop_image_from_pdf_page():
  function test_table_to_html (line 88) | def test_table_to_html():
  function test_table_to_html_with_spans (line 111) | def test_table_to_html_with_spans():
  function test_process_figure_without_bounding_regions (line 134) | async def test_process_figure_without_bounding_regions():
  function test_process_figure_with_bounding_regions (line 148) | async def test_process_figure_with_bounding_regions(monkeypatch, caplog):
  function test_parse_simple (line 181) | async def test_parse_simple(monkeypatch):
  function test_parse_with_filestorage (line 219) | async def test_parse_with_filestorage(monkeypatch):
  function test_parse_with_non_seekable_stream (line 257) | async def test_parse_with_non_seekable_stream(monkeypatch):
  function test_parse_doc_with_tables (line 307) | async def test_parse_doc_with_tables(monkeypatch):
  function test_parse_doc_with_figures (line 387) | async def test_parse_doc_with_figures(monkeypatch):
  function test_parse_unsupportedformat (line 443) | async def test_parse_unsupportedformat(monkeypatch, caplog):
  function test_figure_processor_openai_requires_client (line 507) | async def test_figure_processor_openai_requires_client():
  function test_figure_processor_openai_describe (line 515) | async def test_figure_processor_openai_describe(monkeypatch):
  function test_figure_processor_content_understanding_initializes_once (line 538) | async def test_figure_processor_content_understanding_initializes_once(m...
  function test_figure_processor_none_strategy_returns_none (line 566) | async def test_figure_processor_none_strategy_returns_none():
  function test_figure_processor_content_understanding_missing_endpoint (line 577) | async def test_figure_processor_content_understanding_missing_endpoint():
  function test_figure_processor_content_understanding_missing_credential (line 588) | async def test_figure_processor_content_understanding_missing_credential():
  function test_figure_processor_content_understanding_key_credential (line 599) | async def test_figure_processor_content_understanding_key_credential():
  function test_figure_processor_openai_returns_describer (line 611) | async def test_figure_processor_openai_returns_describer(monkeypatch):
  function test_figure_processor_unknown_strategy (line 630) | async def test_figure_processor_unknown_strategy(caplog):
  function test_figure_processor_mark_content_understanding_ready (line 644) | async def test_figure_processor_mark_content_understanding_ready():
  function test_build_figure_markup_without_description (line 653) | async def test_build_figure_markup_without_description(sample_image):
  function test_process_page_image_without_blob_manager (line 661) | async def test_process_page_image_without_blob_manager(sample_image):
  function test_process_page_image_without_figure_processor (line 672) | async def test_process_page_image_without_figure_processor(sample_image):
  function test_process_page_image_sets_description (line 691) | async def test_process_page_image_sets_description(sample_image):
  function test_process_page_image_skips_upload_if_url_exists (line 712) | async def test_process_page_image_skips_upload_if_url_exists(sample_image):
  function test_process_page_image_with_embeddings (line 731) | async def test_process_page_image_with_embeddings(sample_image):
  function test_image_on_page_from_skill_payload_without_bytes (line 750) | def test_image_on_page_from_skill_payload_without_bytes():
  function test_image_on_page_from_skill_payload_invalid_page_num (line 770) | def test_image_on_page_from_skill_payload_invalid_page_num():
  function test_image_on_page_from_skill_payload_invalid_bbox (line 784) | def test_image_on_page_from_skill_payload_invalid_bbox():

FILE: tests/test_prepdocs.py
  class MockEmbeddingsClient (line 21) | class MockEmbeddingsClient:
    method __init__ (line 22) | def __init__(self, create_embedding_response: openai.types.CreateEmbed...
    method create (line 25) | async def create(self, *args, **kwargs) -> openai.types.CreateEmbeddin...
  class MockClient (line 29) | class MockClient:
    method __init__ (line 30) | def __init__(self, embeddings_client):
  function test_compute_embedding_success (line 35) | async def test_compute_embedding_success():
  function fake_response (line 82) | def fake_response(http_code):
  class RateLimitMockEmbeddingsClient (line 86) | class RateLimitMockEmbeddingsClient:
    method create (line 87) | async def create(self, *args, **kwargs) -> openai.types.CreateEmbeddin...
  function create_rate_limit_client (line 93) | async def create_rate_limit_client(*args, **kwargs):
  function test_compute_embedding_ratelimiterror_batch (line 98) | async def test_compute_embedding_ratelimiterror_batch(monkeypatch, caplog):
  function test_compute_embedding_ratelimiterror_single (line 116) | async def test_compute_embedding_ratelimiterror_single(monkeypatch, capl...
  class AuthenticationErrorMockEmbeddingsClient (line 133) | class AuthenticationErrorMockEmbeddingsClient:
    method create (line 134) | async def create(self, *args, **kwargs) -> openai.types.CreateEmbeddin...
  function test_compute_embedding_autherror (line 139) | async def test_compute_embedding_autherror(monkeypatch):
  function test_image_embeddings_success (line 164) | async def test_image_embeddings_success(mock_azurehttp_calls):
  function test_openai_embeddings_use_deployment_for_azure_model (line 194) | async def test_openai_embeddings_use_deployment_for_azure_model():
  function test_manageacl_main_uses_search_key (line 229) | async def test_manageacl_main_uses_search_key(monkeypatch: pytest.Monkey...

FILE: tests/test_prepdocslib_filestrategy.py
  function test_parse_file_with_images (line 23) | async def test_parse_file_with_images(monkeypatch):
  function test_file_strategy_setup_with_content_understanding (line 89) | async def test_file_strategy_setup_with_content_understanding(monkeypatc...

FILE: tests/test_prepdocslib_textsplitter.py
  function test_sentencetextsplitter_split_empty_pages (line 28) | def test_sentencetextsplitter_split_empty_pages():
  function test_sentencetextsplitter_split_small_pages (line 34) | def test_sentencetextsplitter_split_small_pages():
  function test_sentencetextsplitter_list_parse_and_split (line 44) | async def test_sentencetextsplitter_list_parse_and_split(tmp_path, snaps...
  function test_simpletextsplitter_split_empty_pages (line 69) | def test_simpletextsplitter_split_empty_pages():
  function test_simpletextsplitter_split_small_pages (line 75) | def test_simpletextsplitter_split_small_pages():
  function test_sentencetextsplitter_split_pages (line 84) | def test_sentencetextsplitter_split_pages():
  function pytest_generate_tests (line 101) | def pytest_generate_tests(metafunc):
  function test_sentencetextsplitter_multilang (line 108) | async def test_sentencetextsplitter_multilang(test_doc, tmp_path):
  function test_split_tables (line 142) | def test_split_tables():
  function test_pages_with_figures (line 180) | def test_pages_with_figures(snapshot, file_name):
  function test_large_figure_not_split (line 200) | def test_large_figure_not_split():
  function test_figure_at_start_emitted (line 221) | def test_figure_at_start_emitted():
  function test_unbalanced_figure_treated_as_text (line 234) | def test_unbalanced_figure_treated_as_text():
  function test_oversize_single_sentence_recursion (line 248) | def test_oversize_single_sentence_recursion():
  function test_sentence_boundary_fallback_half_split (line 260) | def test_sentence_boundary_fallback_half_split():
  function test_cross_page_merge_mid_sentence (line 269) | def test_cross_page_merge_mid_sentence():
  function test_normalization_trims_leading_space_overflow (line 280) | def test_normalization_trims_leading_space_overflow():
  function test_split_page_by_max_tokens_merges_heading_with_figure (line 295) | def test_split_page_by_max_tokens_merges_heading_with_figure():
  function test_recursive_split_uses_sentence_boundary (line 306) | def test_recursive_split_uses_sentence_boundary():
  function test_cross_page_merge_fragment_shift_no_sentence_end (line 321) | def test_cross_page_merge_fragment_shift_no_sentence_end():
  function test_cross_page_merge_fragment_shift_with_sentence_end_and_shortening (line 338) | def test_cross_page_merge_fragment_shift_with_sentence_end_and_shortenin...
  function test_cross_page_merge_fragment_shift_hard_trim (line 362) | def test_cross_page_merge_fragment_shift_hard_trim():
  function test_figure_merge_both_branches (line 382) | def test_figure_merge_both_branches():
  function test_sentence_boundary_right_side (line 411) | def test_sentence_boundary_right_side():
  function test_sentence_boundary_left_side (line 425) | def test_sentence_boundary_left_side():
  function test_sentence_boundary_left_midpoint_exact (line 438) | def test_sentence_boundary_left_midpoint_exact():
  function test_recursive_split_prefers_word_break_over_overlap (line 450) | def test_recursive_split_prefers_word_break_over_overlap():
  function test_recursive_split_overlap_fallback_when_no_word_breaks (line 471) | def test_recursive_split_overlap_fallback_when_no_word_breaks():
  function test_fragment_shift_token_limit_fits_false (line 493) | def test_fragment_shift_token_limit_fits_false():
  function test_fragment_shift_token_limit_single_token_char (line 513) | def test_fragment_shift_token_limit_single_token_char():
  function test_safe_concat_html_tag_boundary (line 533) | def test_safe_concat_html_tag_boundary():
  function test_normalization_trims_trailing_space_overflow (line 546) | def test_normalization_trims_trailing_space_overflow():
  function test_cross_page_fragment_shortening_path (line 567) | def test_cross_page_fragment_shortening_path():
  function test_cross_page_fragment_hard_trim_iterative (line 582) | def test_cross_page_fragment_hard_trim_iterative():
  function test_intra_page_semantic_overlap_applied (line 597) | def test_intra_page_semantic_overlap_applied():
  function test_no_overlap_after_figure_previous (line 623) | def test_no_overlap_after_figure_previous():
  function test_append_overlap_preserves_next_chunk_start (line 644) | def test_append_overlap_preserves_next_chunk_start():

FILE: tests/test_searchmanager.py
  function search_info (line 35) | def search_info():
  function test_create_index_doesnt_exist_yet (line 44) | async def test_create_index_doesnt_exist_yet(monkeypatch, search_info):
  function test_create_index_using_int_vectorization (line 65) | async def test_create_index_using_int_vectorization(monkeypatch, search_...
  function test_create_index_does_exist (line 90) | async def test_create_index_does_exist(monkeypatch, search_info):
  function test_create_index_add_field (line 127) | async def test_create_index_add_field(monkeypatch, search_info):
  function test_create_index_adds_vectorizer_to_existing_index (line 160) | async def test_create_index_adds_vectorizer_to_existing_index(monkeypatc...
  function test_create_index_acls (line 227) | async def test_create_index_acls(monkeypatch, search_info):
  function test_create_index_acls_no_enforcement (line 260) | async def test_create_index_acls_no_enforcement(monkeypatch, search_info):
  function test_create_index_acls_no_existing_fields (line 293) | async def test_create_index_acls_no_existing_fields(monkeypatch, search_...
  function test_create_index_acls_no_existing_fields_no_enforcement (line 343) | async def test_create_index_acls_no_existing_fields_no_enforcement(monke...
  function test_create_index_acls_with_existing_fields (line 393) | async def test_create_index_acls_with_existing_fields(monkeypatch, searc...
  function test_create_index_acls_with_existing_fields_no_enforcement (line 453) | async def test_create_index_acls_with_existing_fields_no_enforcement(mon...
  function test_update_content (line 513) | async def test_update_content(monkeypatch, search_info):
  function test_update_content_many (line 545) | async def test_update_content_many(monkeypatch, search_info):
  function test_update_content_with_embeddings (line 580) | async def test_update_content_with_embeddings(monkeypatch, search_info):
  function test_update_content_no_images_when_disabled (line 644) | async def test_update_content_no_images_when_disabled(monkeypatch, searc...
  function test_update_content_with_images_when_enabled (line 673) | async def test_update_content_with_images_when_enabled(monkeypatch, sear...
  class AsyncSearchResultsIterator (line 721) | class AsyncSearchResultsIterator:
    method __init__ (line 722) | def __init__(self, results):
    method __aiter__ (line 725) | def __aiter__(self):
    method __anext__ (line 728) | async def __anext__(self):
    method get_count (line 733) | async def get_count(self):
  function test_remove_content (line 738) | async def test_remove_content(monkeypatch, search_info):
  function test_remove_content_no_docs (line 780) | async def test_remove_content_no_docs(monkeypatch, search_info):
  function test_remove_content_only_oid (line 804) | async def test_remove_content_only_oid(monkeypatch, search_info):
  function test_remove_content_no_inf_loop (line 864) | async def test_remove_content_no_inf_loop(monkeypatch, search_info):
  function test_create_index_with_search_images (line 904) | async def test_create_index_with_search_images(monkeypatch, search_info):
  function test_create_index_with_search_images_no_endpoint (line 965) | async def test_create_index_with_search_images_no_endpoint(monkeypatch, ...
  function test_create_index_with_search_images_and_embeddings (line 984) | async def test_create_index_with_search_images_and_embeddings(monkeypatc...
  function test_create_knowledgebase_field_names_with_acls_and_images (line 1062) | async def test_create_knowledgebase_field_names_with_acls_and_images(mon...
  function test_create_knowledgebase_with_sharepoint_source (line 1141) | async def test_create_knowledgebase_with_sharepoint_source(monkeypatch, ...
  function test_create_knowledgebase_with_web_and_sharepoint_sources (line 1212) | async def test_create_knowledgebase_with_web_and_sharepoint_sources(monk...

FILE: tests/test_sentencetextsplitter.py
  function test_sentence_text_splitter_initializes_overlap_correctly (line 18) | def test_sentence_text_splitter_initializes_overlap_correctly(

FILE: tests/test_servicesetup.py
  function test_setup_blob_manager_respects_storage_key (line 31) | def test_setup_blob_manager_respects_storage_key(monkeypatch: pytest.Mon...
  function test_setup_embeddings_service_populates_azure_metadata (line 71) | def test_setup_embeddings_service_populates_azure_metadata() -> None:
  function test_setup_embeddings_service_requires_endpoint_for_azure (line 95) | def test_setup_embeddings_service_requires_endpoint_for_azure() -> None:
  function test_setup_embeddings_service_requires_deployment_for_azure (line 116) | def test_setup_embeddings_service_requires_deployment_for_azure() -> None:
  function test_setup_openai_client_azure_constructs_endpoint_correctly (line 137) | def test_setup_openai_client_azure_constructs_endpoint_correctly(monkeyp...
  function test_setup_openai_client_azure_custom_uses_custom_url (line 162) | def test_setup_openai_client_azure_custom_uses_custom_url(monkeypatch: p...
  function test_setup_openai_client_azure_respects_api_key (line 185) | def test_setup_openai_client_azure_respects_api_key(monkeypatch: pytest....
  function test_setup_openai_client_openai_requires_api_key (line 205) | def test_setup_openai_client_openai_requires_api_key() -> None:
  function test_setup_openai_client_azure_requires_service (line 215) | def test_setup_openai_client_azure_requires_service() -> None:
  function test_setup_openai_client_azure_custom_requires_url (line 225) | def test_setup_openai_client_azure_custom_requires_url() -> None:
  function test_setup_search_info_agentic_retrieval_without_model (line 235) | def test_setup_search_info_agentic_retrieval_without_model():
  function test_setup_image_embeddings_multimodal_without_vision (line 247) | def test_setup_image_embeddings_multimodal_without_vision():
  function test_setup_figure_processor_content_understanding (line 257) | def test_setup_figure_processor_content_understanding():
  function test_build_file_processors_with_document_intelligence_key (line 273) | def test_build_file_processors_with_document_intelligence_key():
  function test_build_file_processors_text_files (line 287) | def test_build_file_processors_text_files():
  function test_build_file_processors_with_di_enables_office_formats (line 300) | def test_build_file_processors_with_di_enables_office_formats():
  function test_build_file_processors_without_di_excludes_office_formats (line 313) | def test_build_file_processors_without_di_excludes_office_formats():
  function test_clean_key_if_exists_handles_whitespace (line 325) | def test_clean_key_if_exists_handles_whitespace() -> None:
  function test_build_file_processors_logs_when_no_parsers (line 331) | def test_build_file_processors_logs_when_no_parsers(
  function test_select_processor_for_filename_raises_when_unknown (line 351) | def test_select_processor_for_filename_raises_when_unknown() -> None:

FILE: tests/test_textparser.py
  function test_textparser_remove_new_lines (line 9) | async def test_textparser_remove_new_lines():
  function test_textparser_remove_white_spaces (line 27) | async def test_textparser_remove_white_spaces():
  function test_textparser_full (line 35) | async def test_textparser_full():

FILE: tests/test_textprocessor.py
  function test_combine_text_with_figures_no_description (line 5) | def test_combine_text_with_figures_no_description():
  function test_combine_text_with_figures_placeholder_not_found (line 27) | def test_combine_text_with_figures_placeholder_not_found(caplog):
  function test_combine_text_with_figures_replaces_successfully (line 50) | def test_combine_text_with_figures_replaces_successfully():

FILE: tests/test_upload.py
  function test_upload_file (line 16) | async def test_upload_file(auth_client, monkeypatch, mock_data_lake_serv...
  function test_upload_file_error_wrong_directory_owner (line 91) | async def test_upload_file_error_wrong_directory_owner(auth_client, monk...
  function test_list_uploaded (line 123) | async def test_list_uploaded(auth_client, monkeypatch, mock_data_lake_se...
  function test_list_uploaded_nopaths (line 130) | async def test_list_uploaded_nopaths(auth_client, monkeypatch, mock_data...
  function test_delete_uploaded (line 156) | async def test_delete_uploaded(auth_client, monkeypatch, mock_data_lake_...
Condensed preview — 471 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (6,218K chars).
[
  {
    "path": ".azdo/pipelines/azure-dev.yml",
    "chars": 7472,
    "preview": "# Run when commits are pushed to mainline branch (main or master)\n# Set this to the mainline branch you are using\ntrigge"
  },
  {
    "path": ".devcontainer/devcontainer.json",
    "chars": 1037,
    "preview": "{\n    \"name\": \"Azure Search OpenAI Demo\",\n    \"image\": \"mcr.microsoft.com/devcontainers/python:3.13-bookworm\",\n    \"feat"
  },
  {
    "path": ".gitattributes",
    "chars": 41,
    "preview": "*.sh text eol=lf\n*.jsonlines text eol=lf\n"
  },
  {
    "path": ".github/CODE_OF_CONDUCT.md",
    "chars": 444,
    "preview": "# Microsoft Open Source Code of Conduct\n\nThis project has adopted the [Microsoft Open Source Code of Conduct](https://op"
  },
  {
    "path": ".github/ISSUE_TEMPLATE.md",
    "chars": 966,
    "preview": "<!--\nIF SUFFICIENT INFORMATION IS NOT PROVIDED VIA THE FOLLOWING TEMPLATE THE ISSUE MIGHT BE CLOSED WITHOUT FURTHER CONS"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "chars": 1545,
    "preview": "## Purpose\n\n<!-- Describe the intention of the changes being proposed. What problem does it solve or functionality does "
  },
  {
    "path": ".github/agents/fixer.agent.md",
    "chars": 3232,
    "preview": "---\ndescription: 'Fix and verify issues in app'\ntools: ['vscode', 'execute', 'read', 'edit', 'search', 'web', 'agent', '"
  },
  {
    "path": ".github/agents/triager.agent.md",
    "chars": 3718,
    "preview": "---\ndescription: 'Triage old stale issues for obsolescence and recommend closures'\ntools: ['edit', 'search/usages', 'web"
  },
  {
    "path": ".github/dependabot.yaml",
    "chars": 1130,
    "preview": "version: 2\nupdates:\n\n  # Maintain dependencies for GitHub Actions\n  - package-ecosystem: \"github-actions\"\n    directory:"
  },
  {
    "path": ".github/instructions/bicep.instructions.md",
    "chars": 3264,
    "preview": "---\ndescription: 'Infrastructure as Code with Bicep'\napplyTo: '**/*.bicep'\n---\n\n# Bicep best-practices\nThis list of best"
  },
  {
    "path": ".github/prompts/review_pr_comments.prompt.md",
    "chars": 1472,
    "preview": "---\nagent: agent\n---\nWe have received comments on the current active pull request. Together, we will go through each com"
  },
  {
    "path": ".github/skills/github-pr-inline-reply/SKILL.md",
    "chars": 3327,
    "preview": "---\nname: github-pr-inline-reply\ndescription: Reply to inline PR review comments on GitHub pull requests using the GitHu"
  },
  {
    "path": ".github/workflows/azure-dev-validation.yaml",
    "chars": 1498,
    "preview": "name: Validate AZD template\non:\n  push:\n    branches: [ main ]\n    paths:\n      - \"infra/**\"\n  pull_request:\n    branche"
  },
  {
    "path": ".github/workflows/azure-dev.yml",
    "chars": 8479,
    "preview": "name: Deploy\n\non:\n  workflow_dispatch:\n  push:\n    # Run when commits are pushed to mainline branch (main or master)\n   "
  },
  {
    "path": ".github/workflows/evaluate.yaml",
    "chars": 10975,
    "preview": "name: Evaluate RAG answer flow\n\non:\n  issue_comment:\n    types: [created]\n\n# Set up permissions for deploying with secre"
  },
  {
    "path": ".github/workflows/frontend.yaml",
    "chars": 446,
    "preview": "name: Frontend linting\n\non:\n  push:\n    branches: [ main ]\n    paths:\n      - \"app/frontend/**\"\n  pull_request:\n    bran"
  },
  {
    "path": ".github/workflows/lint-markdown.yml",
    "chars": 498,
    "preview": "name: Validate Markdown\n\non:\n  pull_request:\n    branches:\n      - main\n    paths:\n      - '**.md'\n\njobs:\n  lint-markdow"
  },
  {
    "path": ".github/workflows/nightly-jobs.yaml",
    "chars": 150,
    "preview": "name: Nightly Jobs\n\non:\n  schedule:\n    - cron: '0 0 * * *'\n  workflow_dispatch:\n\njobs:\n  python-test:\n    uses: ./.gith"
  },
  {
    "path": ".github/workflows/python-test.yaml",
    "chars": 2850,
    "preview": "name: Python check\n\non:\n  push:\n    branches: [ main ]\n    paths-ignore:\n      - \"**.md\"\n      - \".azdo/**\"\n      - \".de"
  },
  {
    "path": ".github/workflows/stale-bot.yml",
    "chars": 857,
    "preview": "name: 'Close stale issues and PRs'\non:\n  schedule:\n    - cron: '30 1 * * *'\n\njobs:\n  stale:\n    runs-on: ubuntu-latest\n "
  },
  {
    "path": ".github/workflows/validate-markdown.yml",
    "chars": 2056,
    "preview": "name: Validate Markdown\n\non:\n  # Trigger the workflow on pull request\n  pull_request_target:\n    branches:\n      - main\n"
  },
  {
    "path": ".gitignore",
    "chars": 2265,
    "preview": "# Azure az webapp deployment details\n.azure\n*_env\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.c"
  },
  {
    "path": ".markdownlint-cli2.jsonc",
    "chars": 188,
    "preview": "{\n    \"config\": {\n        \"default\": true,\n        \"line-length\": false,\n        \"table-column-style\": false,\n        \"M"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 541,
    "preview": "exclude: '^tests/snapshots/'\nrepos:\n-   repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v5.0.0\n    hooks:\n"
  },
  {
    "path": ".vscode/extensions.json",
    "chars": 235,
    "preview": "{\n    \"recommendations\": [\n        \"ms-azuretools.azure-dev\",\n        \"ms-azuretools.vscode-bicep\",\n        \"ms-python.p"
  },
  {
    "path": ".vscode/launch.json",
    "chars": 1806,
    "preview": "{\n    // Use IntelliSense to learn about possible attributes.\n    // Hover to view descriptions of existing attributes.\n"
  },
  {
    "path": ".vscode/settings.json",
    "chars": 1060,
    "preview": "{\n    \"python.languageServer\": \"None\", // Disabling due to ty using its own full-featured language server\n    \"[javascri"
  },
  {
    "path": ".vscode/tasks.json",
    "chars": 2757,
    "preview": "{\n    \"version\": \"2.0.0\",\n    \"tasks\": [\n        {\n            \"label\": \"Start App\",\n            \"type\": \"shell\",\n      "
  },
  {
    "path": "AGENTS.md",
    "chars": 12486,
    "preview": "# Instructions for Coding Agents\n\nThis file contains instructions for developers working on the Azure Search and OpenAI "
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 5295,
    "preview": "# Contributing\n\nThis project welcomes contributions and suggestions.  Most contributions require you to agree to a\nContr"
  },
  {
    "path": "LICENSE",
    "chars": 1070,
    "preview": "MIT License\n\nCopyright (c) 2023 Azure Samples\n\nPermission is hereby granted, free of charge, to any person obtaining a c"
  },
  {
    "path": "README.md",
    "chars": 19782,
    "preview": "<!--\n---\nname: RAG chat app with your data (Python)\ndescription: Chat with your domain data using Azure OpenAI and Azure"
  },
  {
    "path": "SECURITY.md",
    "chars": 2747,
    "preview": "<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->\n# Security\n\nMicrosoft takes the security of our software products and "
  },
  {
    "path": "app/backend/.dockerignore",
    "chars": 47,
    "preview": ".git\n__pycache__\n*.pyc\n*.pyo\n*.pyd\n.Python\nenv\n"
  },
  {
    "path": "app/backend/Dockerfile",
    "chars": 206,
    "preview": "FROM python:3.13-bookworm\n\nWORKDIR /app\n\nCOPY ./ /app\n\nRUN python -m pip install -r requirements.txt\n\nRUN python -m pip "
  },
  {
    "path": "app/backend/app.py",
    "chars": 38518,
    "preview": "import dataclasses\nimport io\nimport json\nimport logging\nimport mimetypes\nimport os\nimport time\nfrom collections.abc impo"
  },
  {
    "path": "app/backend/approaches/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "app/backend/approaches/approach.py",
    "chars": 42610,
    "preview": "import base64\nimport json\nimport re\nfrom abc import ABC\nfrom collections.abc import AsyncGenerator, Awaitable\nfrom datac"
  },
  {
    "path": "app/backend/approaches/chatreadretrieveread.py",
    "chars": 24617,
    "preview": "import re\nfrom collections.abc import AsyncGenerator, Awaitable\nfrom dataclasses import asdict\nfrom typing import Any, O"
  },
  {
    "path": "app/backend/approaches/promptmanager.py",
    "chars": 4152,
    "preview": "import json\nimport pathlib\nfrom typing import Any, cast\n\nfrom jinja2 import Environment, FileSystemLoader\nfrom openai.ty"
  },
  {
    "path": "app/backend/approaches/prompts/chat_answer.system.jinja2",
    "chars": 2096,
    "preview": "{% if override_prompt %}\n{{ override_prompt }}\n{% else %}\nAssistant helps the company employees with their questions abo"
  },
  {
    "path": "app/backend/approaches/prompts/chat_answer.user.jinja2",
    "chars": 161,
    "preview": "{{ user_query }}\n\n{% if text_sources is defined and text_sources %}\n\nSources:\n\n{% for text_source in text_sources %}\n{{ "
  },
  {
    "path": "app/backend/approaches/prompts/chat_query_rewrite_tools.json",
    "chars": 513,
    "preview": "[{\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"search_sources\",\n        \"description\": \"Retrieve sources "
  },
  {
    "path": "app/backend/approaches/prompts/query_rewrite.system.jinja2",
    "chars": 1103,
    "preview": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searchi"
  },
  {
    "path": "app/backend/chat_history/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "app/backend/chat_history/cosmosdb.py",
    "chars": 9573,
    "preview": "import os\nimport time\nfrom typing import Any\n\nfrom azure.cosmos.aio import ContainerProxy, CosmosClient\nfrom azure.ident"
  },
  {
    "path": "app/backend/config.py",
    "chars": 2388,
    "preview": "CONFIG_OPENAI_TOKEN = \"openai_token\"\nCONFIG_CREDENTIAL = \"azure_credential\"\nCONFIG_CHAT_APPROACH = \"chat_approach\"\nCONFI"
  },
  {
    "path": "app/backend/core/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "app/backend/core/authentication.py",
    "chars": 13675,
    "preview": "# Refactored from https://github.com/Azure-Samples/ms-identity-python-on-behalf-of\n\nimport base64\nimport json\nimport log"
  },
  {
    "path": "app/backend/core/sessionhelper.py",
    "chars": 342,
    "preview": "import uuid\nfrom typing import Optional\n\n\ndef create_session_id(\n    config_chat_history_cosmos_enabled: bool, config_ch"
  },
  {
    "path": "app/backend/custom_uvicorn_worker.py",
    "chars": 1220,
    "preview": "from uvicorn.workers import UvicornWorker\n\nlogconfig_dict = {\n    \"version\": 1,\n    \"disable_existing_loggers\": False,\n "
  },
  {
    "path": "app/backend/decorators.py",
    "chars": 1924,
    "preview": "import logging\nfrom collections.abc import Callable\nfrom functools import wraps\nfrom typing import Any, TypeVar, cast\n\nf"
  },
  {
    "path": "app/backend/error.py",
    "chars": 1193,
    "preview": "import logging\n\nfrom openai import APIError\nfrom quart import jsonify\n\nERROR_MESSAGE = \"\"\"The app encountered an error p"
  },
  {
    "path": "app/backend/gunicorn.conf.py",
    "chars": 523,
    "preview": "import multiprocessing\nimport os\n\nmax_requests = 1000\nmax_requests_jitter = 50\nlog_file = \"-\"\nbind = \"0.0.0.0\"\n\ntimeout "
  },
  {
    "path": "app/backend/load_azd_env.py",
    "chars": 1107,
    "preview": "import json\nimport logging\nimport os\nimport subprocess\n\nfrom dotenv import load_dotenv\n\nlogger = logging.getLogger(\"scri"
  },
  {
    "path": "app/backend/main.py",
    "chars": 345,
    "preview": "import os\n\nfrom app import create_app\nfrom load_azd_env import load_azd_env\n\n# WEBSITE_HOSTNAME is always set by App Ser"
  },
  {
    "path": "app/backend/prepdocs.py",
    "chars": 15010,
    "preview": "import argparse\nimport asyncio\nimport logging\nimport os\nfrom typing import Optional\n\nimport aiohttp\nfrom azure.core.cred"
  },
  {
    "path": "app/backend/prepdocslib/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "app/backend/prepdocslib/blobmanager.py",
    "chars": 23183,
    "preview": "import io\nimport logging\nimport os\nimport re\nfrom pathlib import Path\nfrom typing import IO, Any, Optional, TypedDict, c"
  },
  {
    "path": "app/backend/prepdocslib/cloudingestionstrategy.py",
    "chars": 18226,
    "preview": "\"\"\"Cloud ingestion strategy using Azure AI Search custom skills.\"\"\"\n\nimport logging\nfrom dataclasses import dataclass\nfr"
  },
  {
    "path": "app/backend/prepdocslib/csvparser.py",
    "chars": 1036,
    "preview": "import csv\nfrom collections.abc import AsyncGenerator\nfrom typing import IO\n\nfrom .page import Page\nfrom .parser import "
  },
  {
    "path": "app/backend/prepdocslib/embeddings.py",
    "chars": 8211,
    "preview": "import logging\nfrom abc import ABC\nfrom collections.abc import Awaitable, Callable\nfrom urllib.parse import urljoin\n\nimp"
  },
  {
    "path": "app/backend/prepdocslib/figureprocessor.py",
    "chars": 5963,
    "preview": "\"\"\"Utilities for describing and enriching figures extracted from documents.\"\"\"\n\nimport logging\nfrom enum import Enum\nfro"
  },
  {
    "path": "app/backend/prepdocslib/fileprocessor.py",
    "chars": 194,
    "preview": "from dataclasses import dataclass\n\nfrom .parser import Parser\nfrom .textsplitter import TextSplitter\n\n\n@dataclass(frozen"
  },
  {
    "path": "app/backend/prepdocslib/filestrategy.py",
    "chars": 8001,
    "preview": "import logging\nfrom typing import Optional\n\nfrom .blobmanager import AdlsBlobManager, BaseBlobManager, BlobManager\nfrom "
  },
  {
    "path": "app/backend/prepdocslib/htmlparser.py",
    "chars": 1493,
    "preview": "import logging\nimport re\nfrom collections.abc import AsyncGenerator\nfrom typing import IO\n\nfrom bs4 import BeautifulSoup"
  },
  {
    "path": "app/backend/prepdocslib/integratedvectorizerstrategy.py",
    "chars": 8340,
    "preview": "import logging\nfrom typing import Optional\n\nfrom azure.search.documents.indexes._generated.models import (\n    NativeBlo"
  },
  {
    "path": "app/backend/prepdocslib/jsonparser.py",
    "chars": 830,
    "preview": "import json\nfrom collections.abc import AsyncGenerator\nfrom typing import IO\n\nfrom .page import Page\nfrom .parser import"
  },
  {
    "path": "app/backend/prepdocslib/listfilestrategy.py",
    "chars": 4716,
    "preview": "import base64\nimport hashlib\nimport logging\nimport os\nimport re\nfrom abc import ABC\nfrom collections.abc import AsyncGen"
  },
  {
    "path": "app/backend/prepdocslib/mediadescriber.py",
    "chars": 7138,
    "preview": "import base64\nimport logging\nfrom abc import ABC\nfrom typing import Optional\n\nimport aiohttp\nfrom azure.core.credentials"
  },
  {
    "path": "app/backend/prepdocslib/page.py",
    "chars": 4379,
    "preview": "import base64\nfrom dataclasses import asdict, dataclass, field\nfrom typing import Any, Optional\n\n\n@dataclass\nclass Image"
  },
  {
    "path": "app/backend/prepdocslib/parser.py",
    "chars": 374,
    "preview": "from abc import ABC\nfrom collections.abc import AsyncGenerator\nfrom typing import IO\n\nfrom .page import Page\n\n\nclass Par"
  },
  {
    "path": "app/backend/prepdocslib/pdfparser.py",
    "chars": 12912,
    "preview": "import html\nimport io\nimport logging\nimport uuid\nfrom collections.abc import AsyncGenerator\nfrom enum import Enum\nfrom t"
  },
  {
    "path": "app/backend/prepdocslib/searchmanager.py",
    "chars": 34592,
    "preview": "import asyncio\nimport logging\nimport os\nfrom typing import Optional\n\nfrom azure.search.documents.indexes.models import ("
  },
  {
    "path": "app/backend/prepdocslib/servicesetup.py",
    "chars": 13569,
    "preview": "\"\"\"Shared service setup helpers.\"\"\"\n\nimport logging\nimport os\nfrom collections.abc import Awaitable, Callable\nfrom enum "
  },
  {
    "path": "app/backend/prepdocslib/strategy.py",
    "chars": 2493,
    "preview": "from abc import ABC\nfrom enum import Enum\nfrom typing import Optional\n\nfrom azure.core.credentials import AzureKeyCreden"
  },
  {
    "path": "app/backend/prepdocslib/textparser.py",
    "chars": 893,
    "preview": "import re\nfrom collections.abc import AsyncGenerator\nfrom typing import IO\n\nfrom .page import Page\nfrom .parser import P"
  },
  {
    "path": "app/backend/prepdocslib/textprocessor.py",
    "chars": 2043,
    "preview": "\"\"\"Utilities for processing document text and combining it with figure descriptions.\"\"\"\n\nimport logging\n\nfrom .figurepro"
  },
  {
    "path": "app/backend/prepdocslib/textsplitter.py",
    "chars": 26296,
    "preview": "import logging\nimport re\nfrom abc import ABC\nfrom collections.abc import Generator\nfrom dataclasses import dataclass, fi"
  },
  {
    "path": "app/backend/requirements.in",
    "chars": 598,
    "preview": "azure-functions>=1.24.0\nazure-identity\nquart\nquart-cors\nopenai>=1.109.1\ntiktoken\ntenacity\nazure-ai-documentintelligence="
  },
  {
    "path": "app/backend/requirements.txt",
    "chars": 11374,
    "preview": "# This file was autogenerated by uv via the following command:\n#    uv pip compile requirements.in -o requirements.txt -"
  },
  {
    "path": "app/backend/setup_cloud_ingestion.py",
    "chars": 8227,
    "preview": "\"\"\"Script to setup cloud ingestion for Azure AI Search.\"\"\"\n\nimport asyncio\nimport logging\nimport os\n\nfrom azure.core.cre"
  },
  {
    "path": "app/frontend/.npmrc",
    "chars": 228,
    "preview": "engine-strict=true\nfund=false\n# Required because react-helmet-async declares peer deps for @types/react <19.0.0,\n# but w"
  },
  {
    "path": "app/frontend/.nvmrc",
    "chars": 7,
    "preview": "22.0.0\n"
  },
  {
    "path": "app/frontend/.prettierignore",
    "chars": 24,
    "preview": "# Ignore JSON\n**/*.json\n"
  },
  {
    "path": "app/frontend/.prettierrc.json",
    "chars": 102,
    "preview": "{\n    \"tabWidth\": 4,\n    \"printWidth\": 160,\n    \"arrowParens\": \"avoid\",\n    \"trailingComma\": \"none\"\n}\n"
  },
  {
    "path": "app/frontend/index.html",
    "chars": 408,
    "preview": "<!doctype html>\n<html lang=\"en\">\n    <head>\n        <meta charset=\"UTF-8\" />\n        <link rel=\"icon\" type=\"image/x-icon"
  },
  {
    "path": "app/frontend/package.json",
    "chars": 1443,
    "preview": "{\n  \"name\": \"frontend\",\n  \"private\": true,\n  \"version\": \"0.0.0\",\n  \"type\": \"module\",\n  \"engines\": {\n    \"node\": \">=20.0."
  },
  {
    "path": "app/frontend/src/api/api.ts",
    "chars": 5913,
    "preview": "const BACKEND_URI = \"\";\n\nimport { ChatAppResponse, ChatAppResponseOrError, ChatAppRequest, Config, SimpleAPIResponse, Hi"
  },
  {
    "path": "app/frontend/src/api/index.ts",
    "chars": 49,
    "preview": "export * from \"./api\";\nexport * from \"./models\";\n"
  },
  {
    "path": "app/frontend/src/api/models.ts",
    "chars": 3936,
    "preview": "export const enum RetrievalMode {\n    Hybrid = \"hybrid\",\n    Vectors = \"vectors\",\n    Text = \"text\"\n}\n\nexport type ChatA"
  },
  {
    "path": "app/frontend/src/authConfig.ts",
    "chars": 10364,
    "preview": "// Refactored from https://github.com/Azure-Samples/ms-identity-javascript-react-tutorial/blob/main/1-Authentication/1-s"
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/AgentPlan.tsx",
    "chars": 14046,
    "preview": "import React from \"react\";\nimport { Light as SyntaxHighlighter } from \"react-syntax-highlighter\";\nimport json from \"reac"
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css",
    "chars": 4868,
    "preview": ".thoughtProcess {\n    font-family: source-code-pro, Menlo, Monaco, Consolas, \"Courier New\", monospace;\n    word-wrap: br"
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx",
    "chars": 4538,
    "preview": "import { useMsal } from \"@azure/msal-react\";\nimport { Tab, TabList, SelectTabData, SelectTabEvent } from \"@fluentui/reac"
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/AnalysisPanelTabs.tsx",
    "chars": 153,
    "preview": "export enum AnalysisPanelTabs {\n    ThoughtProcessTab = \"thoughtProcess\",\n    SupportingContentTab = \"supportingContent\""
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx",
    "chars": 3232,
    "preview": "import React from \"react\";\nimport { Light as SyntaxHighlighter } from \"react-syntax-highlighter\";\nimport json from \"reac"
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/TokenUsageGraph.tsx",
    "chars": 7865,
    "preview": "import React from \"react\";\nimport styles from \"./AnalysisPanel.module.css\";\n\nexport interface TokenUsage {\n    prompt_to"
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/agentPlanUtils.ts",
    "chars": 1122,
    "preview": "export type QueryPlanStep = {\n    id: number | string;\n    type: string;\n    label?: string;\n    elapsed_ms?: number;\n  "
  },
  {
    "path": "app/frontend/src/components/AnalysisPanel/index.tsx",
    "chars": 70,
    "preview": "export * from \"./AnalysisPanel\";\nexport * from \"./AnalysisPanelTabs\";\n"
  },
  {
    "path": "app/frontend/src/components/Answer/Answer.module.css",
    "chars": 3118,
    "preview": ".answerContainer {\n    padding: 1.25em;\n    background: rgb(249, 249, 249);\n    border-radius: 0.75em;\n    box-shadow:\n "
  },
  {
    "path": "app/frontend/src/components/Answer/Answer.tsx",
    "chars": 8537,
    "preview": "import { useMemo, useState } from \"react\";\nimport { Button } from \"@fluentui/react-components\";\nimport { Copy24Regular, "
  },
  {
    "path": "app/frontend/src/components/Answer/AnswerError.tsx",
    "chars": 814,
    "preview": "import { Button } from \"@fluentui/react-components\";\nimport { ErrorCircle24Regular } from \"@fluentui/react-icons\";\n\nimpo"
  },
  {
    "path": "app/frontend/src/components/Answer/AnswerIcon.tsx",
    "chars": 210,
    "preview": "import { Sparkle28Filled } from \"@fluentui/react-icons\";\n\nexport const AnswerIcon = () => {\n    return <Sparkle28Filled "
  },
  {
    "path": "app/frontend/src/components/Answer/AnswerLoading.tsx",
    "chars": 925,
    "preview": "import { animated, useSpring } from \"@react-spring/web\";\nimport { useTranslation } from \"react-i18next\";\n\nimport styles "
  },
  {
    "path": "app/frontend/src/components/Answer/AnswerParser.tsx",
    "chars": 7951,
    "preview": "import { renderToStaticMarkup } from \"react-dom/server\";\nimport { ChatAppResponse, getCitationFilePath } from \"../../api"
  },
  {
    "path": "app/frontend/src/components/Answer/SpeechOutputAzure.tsx",
    "chars": 3251,
    "preview": "import { useState } from \"react\";\nimport { useTranslation } from \"react-i18next\";\nimport { Button } from \"@fluentui/reac"
  },
  {
    "path": "app/frontend/src/components/Answer/SpeechOutputBrowser.tsx",
    "chars": 2554,
    "preview": "import { useState } from \"react\";\nimport { Button } from \"@fluentui/react-components\";\nimport { Speaker224Regular } from"
  },
  {
    "path": "app/frontend/src/components/Answer/index.ts",
    "chars": 166,
    "preview": "export * from \"./Answer\";\nexport * from \"./AnswerLoading\";\nexport * from \"./AnswerError\";\nexport * from \"./SpeechOutputB"
  },
  {
    "path": "app/frontend/src/components/ClearChatButton/ClearChatButton.module.css",
    "chars": 119,
    "preview": ".container {\n    display: flex;\n    align-items: center;\n    gap: 0.375em;\n    cursor: pointer;\n    padding: 0.5rem;\n}\n"
  },
  {
    "path": "app/frontend/src/components/ClearChatButton/ClearChatButton.tsx",
    "chars": 671,
    "preview": "import { Delete24Regular } from \"@fluentui/react-icons\";\nimport { Button } from \"@fluentui/react-components\";\nimport { u"
  },
  {
    "path": "app/frontend/src/components/ClearChatButton/index.tsx",
    "chars": 35,
    "preview": "export * from \"./ClearChatButton\";\n"
  },
  {
    "path": "app/frontend/src/components/Example/Example.module.css",
    "chars": 1574,
    "preview": ".examplesNavList {\n    list-style: none;\n    padding-left: 1rem;\n    padding-right: 1rem;\n    display: flex;\n    flex-di"
  },
  {
    "path": "app/frontend/src/components/Example/Example.tsx",
    "chars": 369,
    "preview": "import styles from \"./Example.module.css\";\n\ninterface Props {\n    text: string;\n    value: string;\n    onClick: (value: "
  },
  {
    "path": "app/frontend/src/components/Example/ExampleList.tsx",
    "chars": 944,
    "preview": "import { Example } from \"./Example\";\nimport { useTranslation } from \"react-i18next\";\n\nimport styles from \"./Example.modu"
  },
  {
    "path": "app/frontend/src/components/Example/index.tsx",
    "chars": 58,
    "preview": "export * from \"./Example\";\nexport * from \"./ExampleList\";\n"
  },
  {
    "path": "app/frontend/src/components/HelpCallout/HelpCallout.tsx",
    "chars": 1990,
    "preview": "import { type JSX, useId, useState } from \"react\";\nimport { Button, Popover, PopoverTrigger, PopoverSurface } from \"@flu"
  },
  {
    "path": "app/frontend/src/components/HelpCallout/index.ts",
    "chars": 31,
    "preview": "export * from \"./HelpCallout\";\n"
  },
  {
    "path": "app/frontend/src/components/HistoryButton/HistoryButton.module.css",
    "chars": 119,
    "preview": ".container {\n    display: flex;\n    align-items: center;\n    gap: 0.375em;\n    cursor: pointer;\n    padding: 0.5rem;\n}\n"
  },
  {
    "path": "app/frontend/src/components/HistoryButton/HistoryButton.tsx",
    "chars": 677,
    "preview": "import { History24Regular } from \"@fluentui/react-icons\";\nimport { Button } from \"@fluentui/react-components\";\nimport { "
  },
  {
    "path": "app/frontend/src/components/HistoryButton/index.tsx",
    "chars": 33,
    "preview": "export * from \"./HistoryButton\";\n"
  },
  {
    "path": "app/frontend/src/components/HistoryItem/HistoryItem.module.css",
    "chars": 1954,
    "preview": ".historyItem {\n    display: flex;\n    align-items: center;\n    justify-content: space-between;\n    padding: 4px 8px;\n   "
  },
  {
    "path": "app/frontend/src/components/HistoryItem/HistoryItem.tsx",
    "chars": 2290,
    "preview": "import { useState, useCallback } from \"react\";\nimport { useTranslation } from \"react-i18next\";\nimport styles from \"./His"
  },
  {
    "path": "app/frontend/src/components/HistoryItem/index.tsx",
    "chars": 31,
    "preview": "export * from \"./HistoryItem\";\n"
  },
  {
    "path": "app/frontend/src/components/HistoryPanel/HistoryPanel.module.css",
    "chars": 214,
    "preview": ".group {\n    margin-top: 1rem;\n}\n.groupLabel {\n    font-size: 0.9rem;\n    font-weight: bold;\n    margin-top: 0.5rem;\n   "
  },
  {
    "path": "app/frontend/src/components/HistoryPanel/HistoryPanel.tsx",
    "chars": 6269,
    "preview": "import { useMsal } from \"@azure/msal-react\";\nimport { getToken, useLogin } from \"../../authConfig\";\nimport { OverlayDraw"
  },
  {
    "path": "app/frontend/src/components/HistoryPanel/index.tsx",
    "chars": 32,
    "preview": "export * from \"./HistoryPanel\";\n"
  },
  {
    "path": "app/frontend/src/components/HistoryProviders/CosmosDB.ts",
    "chars": 1759,
    "preview": "import { IHistoryProvider, Answers, HistoryProviderOptions, HistoryMetaData } from \"./IProvider\";\nimport { deleteChatHis"
  },
  {
    "path": "app/frontend/src/components/HistoryProviders/HistoryManager.ts",
    "chars": 877,
    "preview": "import { useMemo } from \"react\";\nimport { IHistoryProvider, HistoryProviderOptions } from \"../HistoryProviders/IProvider"
  },
  {
    "path": "app/frontend/src/components/HistoryProviders/IProvider.ts",
    "chars": 719,
    "preview": "import { ChatAppResponse } from \"../../api\";\n\nexport type HistoryMetaData = { id: string; title: string; timestamp: numb"
  },
  {
    "path": "app/frontend/src/components/HistoryProviders/IndexedDB.ts",
    "chars": 3681,
    "preview": "import { IDBPDatabase, openDB } from \"idb\";\nimport { IHistoryProvider, Answers, HistoryProviderOptions, HistoryMetaData "
  },
  {
    "path": "app/frontend/src/components/HistoryProviders/None.ts",
    "chars": 599,
    "preview": "import { IHistoryProvider, Answers, HistoryProviderOptions, HistoryMetaData } from \"./IProvider\";\n\nexport class NoneProv"
  },
  {
    "path": "app/frontend/src/components/HistoryProviders/index.ts",
    "chars": 116,
    "preview": "export * from \"./HistoryManager\";\nexport * from \"./IndexedDB\";\nexport * from \"./IProvider\";\nexport * from \"./None\";\n"
  },
  {
    "path": "app/frontend/src/components/LoginButton/LoginButton.module.css",
    "chars": 109,
    "preview": ".loginButton {\n    border-radius: 0.3125em;\n    font-weight: 100;\n    margin: 0;\n    padding: 0.5rem 1rem;\n}\n"
  },
  {
    "path": "app/frontend/src/components/LoginButton/LoginButton.tsx",
    "chars": 2537,
    "preview": "import { Button } from \"@fluentui/react-components\";\nimport { useMsal } from \"@azure/msal-react\";\nimport { useTranslatio"
  },
  {
    "path": "app/frontend/src/components/LoginButton/index.tsx",
    "chars": 31,
    "preview": "export * from \"./LoginButton\";\n"
  },
  {
    "path": "app/frontend/src/components/MarkdownViewer/MarkdownViewer.module.css",
    "chars": 701,
    "preview": ".downloadButton {\n    position: relative;\n    float: right;\n}\n\n.markdownViewer {\n    border-radius: 0.5em;\n    box-shado"
  },
  {
    "path": "app/frontend/src/components/MarkdownViewer/MarkdownViewer.tsx",
    "chars": 3054,
    "preview": "import { Spinner, MessageBar, MessageBarBody, Link, Button } from \"@fluentui/react-components\";\nimport { Save24Regular }"
  },
  {
    "path": "app/frontend/src/components/MarkdownViewer/index.tsx",
    "chars": 34,
    "preview": "export * from \"./MarkdownViewer\";\n"
  },
  {
    "path": "app/frontend/src/components/QuestionInput/QuestionInput.module.css",
    "chars": 956,
    "preview": ".questionInputContainer {\n    border-radius: 0.75rem;\n    box-shadow:\n        0 2px 8px rgba(0, 0, 0, 0.08),\n        0 0"
  },
  {
    "path": "app/frontend/src/components/QuestionInput/QuestionInput.tsx",
    "chars": 4679,
    "preview": "import { useState, useEffect, useContext, useCallback, useRef } from \"react\";\nimport { Button, Textarea, Tooltip } from "
  },
  {
    "path": "app/frontend/src/components/QuestionInput/SpeechInput.tsx",
    "chars": 4393,
    "preview": "import { SetStateAction, useState } from \"react\";\nimport { Button, Tooltip } from \"@fluentui/react-components\";\nimport {"
  },
  {
    "path": "app/frontend/src/components/QuestionInput/index.ts",
    "chars": 33,
    "preview": "export * from \"./QuestionInput\";\n"
  },
  {
    "path": "app/frontend/src/components/Settings/Settings.module.css",
    "chars": 1254,
    "preview": ".settingsSeparator {\n    margin-top: 0.75rem;\n}\n\n.settingsField {\n    display: flex;\n    flex-direction: column;\n    gap"
  },
  {
    "path": "app/frontend/src/components/Settings/Settings.tsx",
    "chars": 26285,
    "preview": "import { useId } from \"react\";\nimport { useTranslation } from \"react-i18next\";\nimport { Input, Textarea, Checkbox, Dropd"
  },
  {
    "path": "app/frontend/src/components/SettingsButton/SettingsButton.module.css",
    "chars": 119,
    "preview": ".container {\n    display: flex;\n    align-items: center;\n    gap: 0.375em;\n    cursor: pointer;\n    padding: 0.5rem;\n}\n"
  },
  {
    "path": "app/frontend/src/components/SettingsButton/SettingsButton.tsx",
    "chars": 621,
    "preview": "import { Settings24Regular } from \"@fluentui/react-icons\";\nimport { Button } from \"@fluentui/react-components\";\nimport {"
  },
  {
    "path": "app/frontend/src/components/SettingsButton/index.tsx",
    "chars": 34,
    "preview": "export * from \"./SettingsButton\";\n"
  },
  {
    "path": "app/frontend/src/components/SupportingContent/SupportingContent.module.css",
    "chars": 772,
    "preview": ".supportingContentNavList {\n    list-style: none;\n    padding-left: 0.3125em;\n    display: flex;\n    flex-direction: col"
  },
  {
    "path": "app/frontend/src/components/SupportingContent/SupportingContent.tsx",
    "chars": 2133,
    "preview": "import DOMPurify from \"dompurify\";\n\nimport { DataPoints } from \"../../api\";\nimport { parseSupportingContentItem } from \""
  },
  {
    "path": "app/frontend/src/components/SupportingContent/SupportingContentParser.ts",
    "chars": 542,
    "preview": "import DOMPurify from \"dompurify\";\n\ntype ParsedSupportingContentItem = {\n    title: string;\n    content: string;\n};\n\nexp"
  },
  {
    "path": "app/frontend/src/components/SupportingContent/index.ts",
    "chars": 37,
    "preview": "export * from \"./SupportingContent\";\n"
  },
  {
    "path": "app/frontend/src/components/TokenClaimsDisplay/TokenClaimsDisplay.tsx",
    "chars": 3165,
    "preview": "import { Label } from \"@fluentui/react-components\";\nimport { useMsal } from \"@azure/msal-react\";\nimport {\n    DataGridBo"
  },
  {
    "path": "app/frontend/src/components/TokenClaimsDisplay/index.tsx",
    "chars": 38,
    "preview": "export * from \"./TokenClaimsDisplay\";\n"
  },
  {
    "path": "app/frontend/src/components/UploadFile/UploadFile.module.css",
    "chars": 394,
    "preview": ".container {\n    display: flex;\n    align-items: center;\n    gap: 0.375em;\n    cursor: pointer;\n    padding: 0.5rem;\n}\n\n"
  },
  {
    "path": "app/frontend/src/components/UploadFile/UploadFile.tsx",
    "chars": 7104,
    "preview": "import React, { useState, ChangeEvent } from \"react\";\nimport { Button, Popover, PopoverTrigger, PopoverSurface, Label, T"
  },
  {
    "path": "app/frontend/src/components/UploadFile/index.tsx",
    "chars": 30,
    "preview": "export * from \"./UploadFile\";\n"
  },
  {
    "path": "app/frontend/src/components/UserChatMessage/UserChatMessage.module.css",
    "chars": 355,
    "preview": ".container {\n    display: flex;\n    justify-content: flex-end;\n    margin-bottom: 1.25em;\n    max-width: 80%;\n    margin"
  },
  {
    "path": "app/frontend/src/components/UserChatMessage/UserChatMessage.tsx",
    "chars": 292,
    "preview": "import styles from \"./UserChatMessage.module.css\";\n\ninterface Props {\n    message: string;\n}\n\nexport const UserChatMessa"
  },
  {
    "path": "app/frontend/src/components/UserChatMessage/index.ts",
    "chars": 35,
    "preview": "export * from \"./UserChatMessage\";\n"
  },
  {
    "path": "app/frontend/src/components/VectorSettings/VectorSettings.module.css",
    "chars": 273,
    "preview": ".container {\n    margin-top: 0.625em;\n}\n\n.fieldset {\n    border: none;\n    padding: 0;\n}\n\n.legend {\n    font-size: 14px;"
  },
  {
    "path": "app/frontend/src/components/VectorSettings/VectorSettings.tsx",
    "chars": 6382,
    "preview": "import { useEffect, useId, useState } from \"react\";\nimport { Dropdown, Option, Checkbox } from \"@fluentui/react-componen"
  },
  {
    "path": "app/frontend/src/components/VectorSettings/index.ts",
    "chars": 34,
    "preview": "export * from \"./VectorSettings\";\n"
  },
  {
    "path": "app/frontend/src/i18n/LanguagePicker.module.css",
    "chars": 527,
    "preview": ".languagePicker {\n    display: flex;\n    justify-content: center;\n    align-items: center;\n    gap: 0.25rem;\n    padding"
  },
  {
    "path": "app/frontend/src/i18n/LanguagePicker.tsx",
    "chars": 1495,
    "preview": "import { useTranslation } from \"react-i18next\";\nimport { LocalLanguage24Regular } from \"@fluentui/react-icons\";\nimport {"
  },
  {
    "path": "app/frontend/src/i18n/config.ts",
    "chars": 2528,
    "preview": "import i18next from \"i18next\";\nimport LanguageDetector from \"i18next-browser-languagedetector\";\nimport HttpApi from \"i18"
  },
  {
    "path": "app/frontend/src/i18n/index.tsx",
    "chars": 34,
    "preview": "export * from \"./LanguagePicker\";\n"
  },
  {
    "path": "app/frontend/src/index.css",
    "chars": 1044,
    "preview": "* {\n    box-sizing: border-box;\n}\n\nhtml {\n    height: 100%;\n    background: #f2f2f2;\n    font-size: 12px; /* Root font s"
  },
  {
    "path": "app/frontend/src/index.tsx",
    "chars": 2853,
    "preview": "import React from \"react\";\nimport ReactDOM from \"react-dom/client\";\nimport { createHashRouter, RouterProvider } from \"re"
  },
  {
    "path": "app/frontend/src/layoutWrapper.tsx",
    "chars": 1811,
    "preview": "import { useEffect, useRef, useState } from \"react\";\nimport { FluentProvider, webLightTheme } from \"@fluentui/react-comp"
  },
  {
    "path": "app/frontend/src/locales/da/translation.json",
    "chars": 9683,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/en/translation.json",
    "chars": 9472,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/es/translation.json",
    "chars": 11002,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/fr/translation.json",
    "chars": 11229,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/it/translation.json",
    "chars": 10659,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/ja/translation.json",
    "chars": 7156,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"チャット\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/nl/translation.json",
    "chars": 10386,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/pl/translation.json",
    "chars": 10130,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + Wyszukiwanie AI\",\n    \"headerTitle\": \"Azure OpenAI + Wyszukiwanie AI\",\n    \"chat\": \"C"
  },
  {
    "path": "app/frontend/src/locales/ptBR/translation.json",
    "chars": 10455,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Search\",\n    \"headerTitle\": \"Azure OpenAI + AI Search\",\n    \"chat\": \"Chat\",\n    \"l"
  },
  {
    "path": "app/frontend/src/locales/tr/translation.json",
    "chars": 9892,
    "preview": "{\n    \"pageTitle\": \"Azure OpenAI + AI Arama\",\n    \"headerTitle\": \"Azure OpenAI + AI Arama\",\n    \"chat\": \"Sohbet\",\n    \"l"
  },
  {
    "path": "app/frontend/src/loginContext.tsx",
    "chars": 487,
    "preview": "/**\n * This file defines a context for managing login state in a React application.\n * Context provides a way to pass da"
  },
  {
    "path": "app/frontend/src/pages/NoPage.tsx",
    "chars": 140,
    "preview": "import { type JSX } from \"react\";\n\nexport function Component(): JSX.Element {\n    return <h1>404</h1>;\n}\n\nComponent.disp"
  },
  {
    "path": "app/frontend/src/pages/chat/Chat.module.css",
    "chars": 2318,
    "preview": ".container {\n    flex: 1;\n    display: flex;\n    flex-direction: column;\n    margin-top: 1rem;\n}\n\n.chatRoot {\n    flex: "
  },
  {
    "path": "app/frontend/src/pages/chat/Chat.tsx",
    "chars": 38146,
    "preview": "import { useRef, useState, useEffect, useContext } from \"react\";\nimport { useTranslation } from \"react-i18next\";\nimport "
  },
  {
    "path": "app/frontend/src/pages/layout/Layout.module.css",
    "chars": 2618,
    "preview": ".layout {\n    display: flex;\n    flex-direction: column;\n    height: 100%;\n}\n\n.main {\n    display: flex;\n    flex: 1;\n  "
  },
  {
    "path": "app/frontend/src/pages/layout/Layout.tsx",
    "chars": 967,
    "preview": "import { Outlet, Link } from \"react-router-dom\";\nimport { useTranslation } from \"react-i18next\";\nimport styles from \"./L"
  },
  {
    "path": "app/frontend/src/vite-env.d.ts",
    "chars": 38,
    "preview": "/// <reference types=\"vite/client\" />\n"
  },
  {
    "path": "app/frontend/tsconfig.json",
    "chars": 535,
    "preview": "{\n  \"compilerOptions\": {\n    \"target\": \"ESNext\",\n    \"useDefineForClassFields\": true,\n    \"lib\": [\"DOM\", \"DOM.Iterable\","
  },
  {
    "path": "app/frontend/vite.config.ts",
    "chars": 1399,
    "preview": "import { defineConfig } from \"vite\";\nimport react from \"@vitejs/plugin-react\";\n\n// https://vitejs.dev/config/\nexport def"
  },
  {
    "path": "app/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "app/functions/document_extractor/.funcignore",
    "chars": 109,
    "preview": ".git*\n.vscode\n__pycache__\n*.pyc\n.python_packages\n.venv\nlocal.settings.json\ntest\n.pytest_cache\n.coverage\n*.md\n"
  },
  {
    "path": "app/functions/document_extractor/function_app.py",
    "chars": 14079,
    "preview": "\"\"\"\nAzure Function: Document Extractor\nCustom skill for Azure AI Search that extracts and processes document content.\n\"\""
  },
  {
    "path": "app/functions/document_extractor/host.json",
    "chars": 536,
    "preview": "{\n  \"version\": \"2.0\",\n  \"extensions\": {\n    \"mcp\": {\n      \"system\": {\n        \"webhookAuthorizationLevel\": \"anonymous\"\n"
  },
  {
    "path": "app/functions/figure_processor/.funcignore",
    "chars": 109,
    "preview": ".git*\n.vscode\n__pycache__\n*.pyc\n.python_packages\n.venv\nlocal.settings.json\ntest\n.pytest_cache\n.coverage\n*.md\n"
  },
  {
    "path": "app/functions/figure_processor/function_app.py",
    "chars": 7529,
    "preview": "\"\"\"\nAzure Function: Figure Processor\nCustom skill for Azure AI Search that enriches figure payloads emitted by the docum"
  },
  {
    "path": "app/functions/figure_processor/host.json",
    "chars": 536,
    "preview": "{\n  \"version\": \"2.0\",\n  \"extensions\": {\n    \"mcp\": {\n      \"system\": {\n        \"webhookAuthorizationLevel\": \"anonymous\"\n"
  },
  {
    "path": "app/functions/text_processor/.funcignore",
    "chars": 109,
    "preview": ".git*\n.vscode\n__pycache__\n*.pyc\n.python_packages\n.venv\nlocal.settings.json\ntest\n.pytest_cache\n.coverage\n*.md\n"
  },
  {
    "path": "app/functions/text_processor/function_app.py",
    "chars": 12293,
    "preview": "\"\"\"Azure Function: Text Processor.\nCustom skill for Azure AI Search that merges page text with figure metadata, splits i"
  },
  {
    "path": "app/functions/text_processor/host.json",
    "chars": 536,
    "preview": "{\n  \"version\": \"2.0\",\n  \"extensions\": {\n    \"mcp\": {\n      \"system\": {\n        \"webhookAuthorizationLevel\": \"anonymous\"\n"
  },
  {
    "path": "app/start.ps1",
    "chars": 2110,
    "preview": "# set the parent of the script as the current location.\nSet-Location $PSScriptRoot\n\nWrite-Host \"\"\nWrite-Host \"Loading az"
  }
]

// ... and 271 more files (download for full content)

About this extraction

This page contains the full source code of the Azure-Samples/azure-search-openai-demo GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 471 files (33.7 MB), approximately 1.5M tokens, and a symbol index with 1056 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!