Full Code of SciPhi-AI/R2R for AI

main 9c5a94d151f9 cached

501 files

4.5 MB

1.2M tokens

2934 symbols

1 requests

Download .txt

Showing preview only (4,809K chars total). Download the full file or copy to clipboard to get everything.

Repository: SciPhi-AI/R2R
Branch: main
Commit: 9c5a94d151f9
Files: 501
Total size: 4.5 MB

Directory structure:
gitextract_7stu15in/

├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── custom.md
│   │   └── feature_request.md
│   ├── actions/
│   │   ├── login-docker/
│   │   │   └── action.yml
│   │   ├── setup-docker/
│   │   │   └── action.yml
│   │   ├── setup-postgres-ext/
│   │   │   └── action.yml
│   │   ├── setup-python-full/
│   │   │   └── action.yml
│   │   ├── setup-python-light/
│   │   │   └── action.yml
│   │   ├── start-r2r-full/
│   │   │   └── action.yml
│   │   └── start-r2r-light/
│   │       └── action.yml
│   └── workflows/
│       ├── build-cluster-service-docker.yml
│       ├── build-r2r-docker.yml
│       ├── build-unst-service-docker.yml
│       ├── publish-to-npm.yml
│       ├── publish-to-pypi.yml
│       ├── quality.yml
│       ├── r2r-full-py-integration-tests.yml
│       ├── r2r-js-sdk-ci.yml
│       ├── r2r-js-sdk-integration-tests.yml
│       └── r2r-light-py-integration-tests.yml
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.md
├── MANIFEST.md
├── SECURITY.md
├── deployment/
│   └── k8s/
│       ├── kustomizations/
│       │   ├── helm-values_hatchet.yaml
│       │   ├── helm-values_postgresql.yaml
│       │   ├── include/
│       │   │   ├── cm-hatchet.yaml
│       │   │   ├── cm-hatchet_OLD.yaml
│       │   │   ├── cm-init-scripts-hatchet.yaml
│       │   │   ├── cm-init-scripts-r2r.yaml
│       │   │   ├── cm-r2r.yaml
│       │   │   ├── cm-unstructured.yaml
│       │   │   ├── hatchet-dashboard-initc.yaml
│       │   │   ├── hatchet-engine-initc.yaml
│       │   │   ├── hatchet-init-job.yaml
│       │   │   ├── hatchet-rabbitmq-sts.yaml
│       │   │   ├── pgadmin.yaml
│       │   │   ├── pgvector-sts.yaml
│       │   │   ├── r2r-dashboard-indep.yaml
│       │   │   ├── r2r-graph-clustering-indep.yaml
│       │   │   ├── r2r-initc.yaml
│       │   │   ├── r2r-nginx-indep.yaml
│       │   │   └── unstructured-indep.yaml
│       │   ├── kustomization.yaml
│       │   └── patches/
│       │       ├── hatchet-rabbitmq-sts.yaml
│       │       ├── rm-secret-hatchet-postgres.yaml
│       │       ├── rm-secret-hatchet-rabbitmq-config.yaml
│       │       ├── rm-secret-hatchet-rabbitmq.yaml
│       │       ├── rm-secret-hatchet-shared-config.yaml
│       │       └── service.yaml
│       └── manifests/
│           └── examples/
│               ├── externalsecret_hatchet.yaml
│               ├── externalsecret_r2r.yaml
│               ├── ingress-r2r.yaml
│               ├── secrets_hatchet.yaml
│               └── secrets_r2r.yaml
├── docker/
│   ├── compose.full.swarm.yaml
│   ├── compose.full.yaml
│   ├── compose.yaml
│   ├── env/
│   │   ├── hatchet.env
│   │   ├── minio.env
│   │   ├── postgres.env
│   │   ├── r2r-dashboard.env
│   │   ├── r2r-full.env
│   │   └── r2r.env
│   ├── fluent-bit/
│   │   ├── fluent-bit.conf
│   │   └── parsers.conf
│   ├── scripts/
│   │   ├── create-hatchet-db.sh
│   │   ├── setup-token.sh
│   │   └── start-r2r.sh
│   ├── user_configs/
│   │   └── README.md
│   └── user_tools/
│       ├── README.md
│       └── user_requirements.txt
├── docs/
│   ├── README.md
│   ├── cookbooks/
│   │   ├── application.md
│   │   ├── custom-tools.md
│   │   ├── email.md
│   │   ├── evals.md
│   │   ├── graphs.md
│   │   ├── ingestion.md
│   │   ├── local.md
│   │   ├── logging.md
│   │   ├── maintenance.md
│   │   ├── mcp.md
│   │   ├── orchestration.md
│   │   ├── structured-output.md
│   │   ├── web-dev.md
│   │   └── {README.md}
│   ├── documentation/
│   │   ├── README.md
│   │   ├── advanced/
│   │   │   ├── contextual-enrichment.md
│   │   │   └── deduplication.md
│   │   ├── general/
│   │   │   ├── collections.md
│   │   │   ├── conversations.md
│   │   │   ├── documents.md
│   │   │   ├── graphs.md
│   │   │   ├── prompts.md
│   │   │   └── users.md
│   │   └── retrieval/
│   │       ├── advanced-rag.md
│   │       ├── agentic-rag.md
│   │       ├── hybrid-search.md
│   │       └── search-and-rag.md
│   └── introduction/
│       ├── guides/
│       │   ├── rag.md
│       │   └── what-is-r2r.md
│       └── system.md
├── js/
│   ├── README.md
│   └── sdk/
│       ├── .prettierignore
│       ├── README.md
│       ├── __tests__/
│       │   ├── ChunksIntegrationSuperUser.test.ts
│       │   ├── CollectionsIntegrationSuperUser.test.ts
│       │   ├── ConversationsIntegrationSuperUser.test.ts
│       │   ├── ConversationsIntegrationUser.test.ts
│       │   ├── DocumentsAndCollectionsIntegrationUser.test.ts
│       │   ├── DocumentsIntegrationSuperUser.test.ts
│       │   ├── GraphsIntegrationSuperUser.test.ts
│       │   ├── PromptsIntegrationSuperUser.test.ts
│       │   ├── RetrievalIntegrationSuperUser.test.ts
│       │   ├── SystemIntegrationSuperUser.test.ts
│       │   ├── SystemIntegrationUser.test.ts
│       │   ├── UsersIntegrationSuperUser.test.ts
│       │   └── util/
│       │       └── typeTransformer.test.ts
│       ├── examples/
│       │   └── data/
│       │       ├── folder/
│       │       │   ├── karamozov.txt
│       │       │   └── myshkin.txt
│       │       ├── invalid.json
│       │       ├── marmeladov.txt
│       │       ├── raskolnikov.txt
│       │       ├── raskolnikov_2.txt
│       │       ├── sonia.txt
│       │       └── zametov.txt
│       ├── package.json
│       ├── src/
│       │   ├── baseClient.ts
│       │   ├── index.ts
│       │   ├── r2rClient.ts
│       │   ├── types.ts
│       │   ├── utils/
│       │   │   ├── index.ts
│       │   │   ├── typeTransformer.ts
│       │   │   └── utils.ts
│       │   └── v3/
│       │       └── clients/
│       │           ├── chunks.ts
│       │           ├── collections.ts
│       │           ├── conversations.ts
│       │           ├── documents.ts
│       │           ├── graphs.ts
│       │           ├── indices.ts
│       │           ├── prompts.ts
│       │           ├── retrieval.ts
│       │           ├── system.ts
│       │           └── users.ts
│       └── tsconfig.json
├── llms.txt
├── py/
│   ├── .dockerignore
│   ├── Dockerfile
│   ├── README.md
│   ├── all_possible_config.toml
│   ├── core/
│   │   ├── __init__.py
│   │   ├── agent/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── rag.py
│   │   │   └── research.py
│   │   ├── base/
│   │   │   ├── __init__.py
│   │   │   ├── abstractions/
│   │   │   │   └── __init__.py
│   │   │   ├── agent/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── agent.py
│   │   │   │   └── tools/
│   │   │   │       ├── built_in/
│   │   │   │       │   ├── get_file_content.py
│   │   │   │       │   ├── search_file_descriptions.py
│   │   │   │       │   ├── search_file_knowledge.py
│   │   │   │       │   ├── tavily_extract.py
│   │   │   │       │   ├── tavily_search.py
│   │   │   │       │   ├── web_scrape.py
│   │   │   │       │   └── web_search.py
│   │   │   │       └── registry.py
│   │   │   ├── api/
│   │   │   │   └── models/
│   │   │   │       └── __init__.py
│   │   │   ├── parsers/
│   │   │   │   ├── __init__.py
│   │   │   │   └── base_parser.py
│   │   │   ├── providers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auth.py
│   │   │   │   ├── base.py
│   │   │   │   ├── crypto.py
│   │   │   │   ├── database.py
│   │   │   │   ├── email.py
│   │   │   │   ├── embedding.py
│   │   │   │   ├── file.py
│   │   │   │   ├── ingestion.py
│   │   │   │   ├── llm.py
│   │   │   │   ├── ocr.py
│   │   │   │   ├── orchestration.py
│   │   │   │   └── scheduler.py
│   │   │   └── utils/
│   │   │       └── __init__.py
│   │   ├── configs/
│   │   │   ├── full.toml
│   │   │   ├── full_azure.toml
│   │   │   ├── full_lm_studio.toml
│   │   │   ├── full_ollama.toml
│   │   │   ├── gemini.toml
│   │   │   ├── lm_studio.toml
│   │   │   ├── ollama.toml
│   │   │   ├── r2r_azure.toml
│   │   │   ├── r2r_azure_with_test_limits.toml
│   │   │   ├── r2r_with_auth.toml
│   │   │   └── tavily.toml
│   │   ├── examples/
│   │   │   ├── __init__.py
│   │   │   ├── data/
│   │   │   │   ├── aristotle.txt
│   │   │   │   ├── aristotle_v2.txt
│   │   │   │   ├── aristotle_v3.txt
│   │   │   │   ├── got.txt
│   │   │   │   ├── pg_essay_1.html
│   │   │   │   ├── pg_essay_2.html
│   │   │   │   ├── pg_essay_3.html
│   │   │   │   ├── pg_essay_4.html
│   │   │   │   ├── pg_essay_5.html
│   │   │   │   ├── test.txt
│   │   │   │   └── yc_companies.txt
│   │   │   ├── hello_r2r.ipynb
│   │   │   ├── hello_r2r.py
│   │   │   └── supported_file_types/
│   │   │       ├── css.css
│   │   │       ├── csv.csv
│   │   │       ├── doc.doc
│   │   │       ├── docx.docx
│   │   │       ├── eml.eml
│   │   │       ├── epub.epub
│   │   │       ├── heic.heic
│   │   │       ├── html.html
│   │   │       ├── js.js
│   │   │       ├── json.json
│   │   │       ├── md.md
│   │   │       ├── msg.msg
│   │   │       ├── odt.odt
│   │   │       ├── org.org
│   │   │       ├── p7s.p7s
│   │   │       ├── ppt.ppt
│   │   │       ├── pptx.pptx
│   │   │       ├── py.py
│   │   │       ├── rst.rst
│   │   │       ├── rtf.rtf
│   │   │       ├── tiff.tiff
│   │   │       ├── ts.ts
│   │   │       ├── tsv.tsv
│   │   │       ├── txt.txt
│   │   │       ├── xls.xls
│   │   │       └── xlsx.xlsx
│   │   ├── main/
│   │   │   ├── __init__.py
│   │   │   ├── abstractions.py
│   │   │   ├── api/
│   │   │   │   └── v3/
│   │   │   │       ├── base_router.py
│   │   │   │       ├── chunks_router.py
│   │   │   │       ├── collections_router.py
│   │   │   │       ├── conversations_router.py
│   │   │   │       ├── documents_router.py
│   │   │   │       ├── graph_router.py
│   │   │   │       ├── indices_router.py
│   │   │   │       ├── prompts_router.py
│   │   │   │       ├── retrieval_router.py
│   │   │   │       ├── system_router.py
│   │   │   │       └── users_router.py
│   │   │   ├── app.py
│   │   │   ├── app_entry.py
│   │   │   ├── assembly/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── builder.py
│   │   │   │   ├── factory.py
│   │   │   │   └── utils.py
│   │   │   ├── config.py
│   │   │   ├── middleware/
│   │   │   │   ├── __init__.py
│   │   │   │   └── project_schema.py
│   │   │   ├── orchestration/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hatchet/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── graph_workflow.py
│   │   │   │   │   └── ingestion_workflow.py
│   │   │   │   └── simple/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── graph_workflow.py
│   │   │   │       └── ingestion_workflow.py
│   │   │   └── services/
│   │   │       ├── __init__.py
│   │   │       ├── auth_service.py
│   │   │       ├── base.py
│   │   │       ├── graph_service.py
│   │   │       ├── ingestion_service.py
│   │   │       ├── maintenance_service.py
│   │   │       ├── management_service.py
│   │   │       └── retrieval_service.py
│   │   ├── parsers/
│   │   │   ├── __init__.py
│   │   │   ├── media/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_parser.py
│   │   │   │   ├── bmp_parser.py
│   │   │   │   ├── doc_parser.py
│   │   │   │   ├── docx_parser.py
│   │   │   │   ├── img_parser.py
│   │   │   │   ├── odt_parser.py
│   │   │   │   ├── pdf_parser.py
│   │   │   │   ├── ppt_parser.py
│   │   │   │   ├── pptx_parser.py
│   │   │   │   └── rtf_parser.py
│   │   │   ├── structured/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── csv_parser.py
│   │   │   │   ├── eml_parser.py
│   │   │   │   ├── epub_parser.py
│   │   │   │   ├── json_parser.py
│   │   │   │   ├── msg_parser.py
│   │   │   │   ├── org_parser.py
│   │   │   │   ├── p7s_parser.py
│   │   │   │   ├── rst_parser.py
│   │   │   │   ├── tsv_parser.py
│   │   │   │   ├── xls_parser.py
│   │   │   │   └── xlsx_parser.py
│   │   │   └── text/
│   │   │       ├── __init__.py
│   │   │       ├── css_parser.py
│   │   │       ├── html_parser.py
│   │   │       ├── js_parser.py
│   │   │       ├── md_parser.py
│   │   │       ├── python_parser.py
│   │   │       ├── text_parser.py
│   │   │       └── ts_parser.py
│   │   ├── providers/
│   │   │   ├── __init__.py
│   │   │   ├── auth/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clerk.py
│   │   │   │   ├── jwt.py
│   │   │   │   ├── r2r_auth.py
│   │   │   │   └── supabase.py
│   │   │   ├── crypto/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bcrypt.py
│   │   │   │   └── nacl.py
│   │   │   ├── database/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── chunks.py
│   │   │   │   ├── collections.py
│   │   │   │   ├── conversations.py
│   │   │   │   ├── documents.py
│   │   │   │   ├── filters.py
│   │   │   │   ├── graphs.py
│   │   │   │   ├── limits.py
│   │   │   │   ├── maintenance.py
│   │   │   │   ├── postgres.py
│   │   │   │   ├── prompts/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── chunk_enrichment.yaml
│   │   │   │   │   ├── collection_summary.yaml
│   │   │   │   │   ├── dynamic_rag_agent.yaml
│   │   │   │   │   ├── dynamic_rag_agent_xml_tooling.yaml
│   │   │   │   │   ├── graph_communities.yaml
│   │   │   │   │   ├── graph_entity_description.yaml
│   │   │   │   │   ├── graph_extraction.yaml
│   │   │   │   │   ├── hyde.yaml
│   │   │   │   │   ├── rag.yaml
│   │   │   │   │   ├── rag_fusion.yaml
│   │   │   │   │   ├── static_rag_agent.yaml
│   │   │   │   │   ├── static_research_agent.yaml
│   │   │   │   │   ├── summary.yaml
│   │   │   │   │   ├── system.yaml
│   │   │   │   │   ├── vision_img.yaml
│   │   │   │   │   └── vision_pdf.yaml
│   │   │   │   ├── prompts_handler.py
│   │   │   │   ├── tokens.py
│   │   │   │   ├── users.py
│   │   │   │   └── utils.py
│   │   │   ├── email/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── console_mock.py
│   │   │   │   ├── mailersend.py
│   │   │   │   ├── sendgrid.py
│   │   │   │   └── smtp.py
│   │   │   ├── embeddings/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── litellm.py
│   │   │   │   ├── ollama.py
│   │   │   │   ├── openai.py
│   │   │   │   └── utils.py
│   │   │   ├── file/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── postgres.py
│   │   │   │   └── s3.py
│   │   │   ├── ingestion/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── r2r/
│   │   │   │   │   └── base.py
│   │   │   │   └── unstructured/
│   │   │   │       └── base.py
│   │   │   ├── llm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── anthropic.py
│   │   │   │   ├── azure_foundry.py
│   │   │   │   ├── litellm.py
│   │   │   │   ├── openai.py
│   │   │   │   ├── r2r_llm.py
│   │   │   │   └── utils.py
│   │   │   ├── ocr/
│   │   │   │   ├── __init__.py
│   │   │   │   └── mistral.py
│   │   │   ├── orchestration/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hatchet.py
│   │   │   │   └── simple.py
│   │   │   └── scheduler/
│   │   │       ├── __init__.py
│   │   │       └── apscheduler.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── context.py
│   │       ├── logging_config.py
│   │       ├── sentry.py
│   │       └── serper.py
│   ├── migrations/
│   │   ├── README
│   │   ├── alembic.ini
│   │   ├── env.py
│   │   ├── script.py.mako
│   │   └── versions/
│   │       ├── 2fac23e4d91b_migrate_to_document_search.py
│   │       ├── 3efc7b3b1b3d_add_total_tokens_count.py
│   │       ├── 7eb70560f406_add_limits_overrides_to_users.py
│   │       ├── 8077140e1e99_v3_api_database_revision.py
│   │       ├── c45a9cf6a8a4_add_user_and_document_count_to_.py
│   │       └── d342e632358a_migrate_to_asyncpg.py
│   ├── pyproject.toml
│   ├── r2r/
│   │   ├── __init__.py
│   │   ├── mcp.py
│   │   ├── r2r.toml
│   │   └── serve.py
│   ├── sdk/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── asnyc_methods/
│   │   │   ├── __init__.py
│   │   │   ├── chunks.py
│   │   │   ├── collections.py
│   │   │   ├── conversations.py
│   │   │   ├── documents.py
│   │   │   ├── graphs.py
│   │   │   ├── indices.py
│   │   │   ├── prompts.py
│   │   │   ├── retrieval.py
│   │   │   ├── system.py
│   │   │   └── users.py
│   │   ├── async_client.py
│   │   ├── base/
│   │   │   ├── __init_.py
│   │   │   └── base_client.py
│   │   ├── models.py
│   │   ├── sync_client.py
│   │   └── sync_methods/
│   │       ├── __init__.py
│   │       ├── chunks.py
│   │       ├── collections.py
│   │       ├── conversations.py
│   │       ├── documents.py
│   │       ├── graphs.py
│   │       ├── indices.py
│   │       ├── prompts.py
│   │       ├── retrieval.py
│   │       ├── system.py
│   │       └── users.py
│   ├── shared/
│   │   ├── __init__.py
│   │   ├── abstractions/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── document.py
│   │   │   ├── exception.py
│   │   │   ├── graph.py
│   │   │   ├── llm.py
│   │   │   ├── prompt.py
│   │   │   ├── search.py
│   │   │   ├── tool.py
│   │   │   ├── user.py
│   │   │   └── vector.py
│   │   ├── api/
│   │   │   └── models/
│   │   │       ├── __init__.py
│   │   │       ├── auth/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       ├── base.py
│   │   │       ├── graph/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       ├── ingestion/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       ├── management/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       └── retrieval/
│   │   │           ├── __init__.py
│   │   │           └── responses.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── base_utils.py
│   │       └── splitter/
│   │           ├── __init__.py
│   │           └── text.py
│   └── tests/
│       ├── integration/
│       │   ├── conftest.py
│       │   ├── test_agent.py
│       │   ├── test_base.py
│       │   ├── test_chunks.py
│       │   ├── test_collections.py
│       │   ├── test_collections_users_interaction.py
│       │   ├── test_conversations.py
│       │   ├── test_documents.py
│       │   ├── test_filters.py
│       │   ├── test_graphs.py
│       │   ├── test_indices.py
│       │   ├── test_ingestion.py
│       │   ├── test_retrieval.py
│       │   ├── test_retrieval_advanced.py
│       │   ├── test_system.py
│       │   └── test_users.py
│       ├── scaling/
│       │   ├── __init__.py
│       │   └── loadTester.py
│       └── unit/
│           ├── agent/
│           │   ├── test_agent.py
│           │   ├── test_agent_citations.py
│           │   ├── test_agent_citations_old.py
│           │   ├── test_agent_old.py
│           │   └── test_streaming_agent.py
│           ├── app/
│           │   ├── test_config.py
│           │   └── test_routes.py
│           ├── conftest.py
│           ├── database/
│           │   ├── test_collections.py
│           │   ├── test_conversations.py
│           │   ├── test_graphs.py
│           │   └── test_limits.py
│           ├── document/
│           │   ├── test_chunks.py
│           │   ├── test_document_processing.py
│           │   └── test_documents.py
│           └── retrieval/
│               ├── __init__.py
│               ├── conftest.py
│               ├── test_citations.py
│               ├── test_database_filters.py
│               ├── test_rag_processing.py
│               └── test_retrieval_old.py
└── services/
    ├── README.md
    ├── clustering/
    │   ├── Dockerfile.clustering
    │   └── main.py
    └── unstructured/
        ├── Dockerfile.unstructured
        ├── README.md
        └── main.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
*.html linguist-documentation
*.ipynb linguist-documentation
templates/** linguist-vendored


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''

---

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Desktop (please complete the following information):**
 - OS: [e.g. iOS]
 - Browser [e.g. chrome, safari]
 - Version [e.g. 22]

**Smartphone (please complete the following information):**
 - Device: [e.g. iPhone6]
 - OS: [e.g. iOS8.1]
 - Browser [e.g. stock browser, safari]
 - Version [e.g. 22]

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/custom.md
================================================
---
name: Custom issue template
about: Describe this issue template's purpose here.
title: ''
labels: ''
assignees: ''

---


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/actions/login-docker/action.yml
================================================
name: 'Login Docker'
description: 'Sets up Docker for running R2R'
inputs:
  docker_username:
    description: 'Docker Hub username'
    required: true
  docker_password:
    description: 'Docker Hub password or token'
    required: true
runs:
  using: "composite"
  steps:
    - name: Login to Docker Hub
      uses: docker/login-action@v2
      with:
        username: ${{ inputs.docker_username }}
        password: ${{ inputs.docker_password }}


================================================
FILE: .github/actions/setup-docker/action.yml
================================================
name: 'Setup Docker'
description: 'Sets up Docker for running R2R'
runs:
  using: "composite"
  steps:
    - name: Set up Docker
      uses: docker-practice/actions-setup-docker@master
      with:
        docker_version: 20.10
        docker_buildx: true

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v2


================================================
FILE: .github/actions/setup-postgres-ext/action.yml
================================================
name: 'Setup PostgreSQL'
description: 'Sets up PostgreSQL with pgvector'
inputs:
  os:
    description: 'Operating system'
    required: true
runs:
  using: "composite"
  steps:
    - name: Setup PostgreSQL on Ubuntu
      if: inputs.os == 'ubuntu-latest'
      shell: bash
      run: |
        sudo apt-get purge -y 'postgresql-*'
        sudo rm -rf /var/lib/postgresql /var/log/postgresql /etc/postgresql

        echo "deb [signed-by=/usr/share/keyrings/postgresql-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
        wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo gpg --dearmor -o /usr/share/keyrings/postgresql-archive-keyring.gpg

        sudo apt-get update
        sudo apt-get install -y postgresql-15 postgresql-client-15 postgresql-15-pgvector

        sudo systemctl enable postgresql@15-main
        sudo systemctl start postgresql@15-main
        cd /
        sudo -u postgres /usr/lib/postgresql/15/bin/psql -c "ALTER USER postgres PASSWORD 'postgres';"
        sudo -u postgres /usr/lib/postgresql/15/bin/psql -c "CREATE EXTENSION vector;"

        # Set max_connections to 1024
        echo "max_connections = 1024" | sudo tee -a /etc/postgresql/15/main/postgresql.conf
        sudo systemctl reload postgresql@15-main

    - name: Setup PostgreSQL on Windows
      if: inputs.os == 'windows-latest'
      shell: cmd
      run: |

        echo Starting PostgreSQL setup and pgvector installation...

        echo Installing PostgreSQL...
        choco install postgresql15 --params "/Password:postgres" --force

        echo Updating PATH and setting PGPASSWORD...
        set PATH=%PATH%;C:\Program Files\PostgreSQL\15\bin
        set PGPASSWORD=postgres
        echo PATH updated and PGPASSWORD set.

        echo Altering PostgreSQL user password...
        psql -U postgres -c "ALTER USER postgres PASSWORD 'postgres';"
        echo PostgreSQL user password altered.

        echo Installing Visual Studio Build Tools...
        choco install visualstudio2022buildtools --package-parameters "--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --passive --norestart"
        echo Visual Studio Build Tools installed.

        echo Setting up Visual Studio environment...
        call "C:\Program Files\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
        echo Visual Studio environment set up.

        echo Cloning and building pgvector...
        set PGROOT=C:\Program Files\PostgreSQL\15
        cd /d %TEMP%
        git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git
        cd pgvector
        echo pgvector cloned.

        echo Creating vector extension...
        psql -U postgres -c "CREATE EXTENSION vector;"
        echo Vector extension created.

        echo Building pgvector...
        nmake /F Makefile.win
        echo pgvector built.

        echo Installing pgvector...
        nmake /F Makefile.win install
        echo pgvector installed.

        echo Setting max_connections to 1024...
        echo max_connections = 1024 >> "C:\Program Files\PostgreSQL\15\data\postgresql.conf"
        echo max_connections set.

        echo Restarting PostgreSQL service...
        net stop postgresql-x64-15
        net start postgresql-x64-15
        echo PostgreSQL service restarted.

        echo Setup complete!

    - name: Setup PostgreSQL on macOS
      if: inputs.os == 'macos-latest'
      shell: bash
      run: |
        brew update
        brew install postgresql@15

        brew services start postgresql@15
        sleep 5
        /opt/homebrew/opt/postgresql@15/bin/createuser -s postgres
        /opt/homebrew/opt/postgresql@15/bin/psql -d postgres -c "ALTER USER postgres PASSWORD 'postgres';"

        cd /tmp
        git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git
        cd pgvector
        export PG_CONFIG=/opt/homebrew/opt/postgresql@15/bin/pg_config
        make
        make install # may need sudo

        # Set max_connections to 1024
        echo "max_connections = 1024" | sudo tee -a /opt/homebrew/var/postgresql@15/postgresql.conf
        brew services restart postgresql@15


================================================
FILE: .github/actions/setup-python-full/action.yml
================================================
name: 'Setup Python for R2R Full'
description: 'Sets up Python and installs R2R dependencies for full installation'

inputs:
  os:
    description: 'Operating system'
    required: true
  python-version:
    description: 'Python version to use'
    required: false
    default: '3.12'

runs:
  using: "composite"
  steps:
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: ${{ inputs.python-version }}
        cache: 'pip'

    - name: Install R2R CLI & Python SDK
      shell: bash
      run: |
        pip install r2r

    - name: Install uv
      shell: bash
      run: |
        pip install uv

    - name: Install uv
      shell: bash
      run: |
        pip install uv

    - name: Cache uv dependencies
      uses: actions/cache@v4
      with:
        path: |
          py/.venv
          py/uv.lock
        key: ${{ runner.os }}-uv-${{ hashFiles('py/pyproject.toml', 'py/uv.lock') }}
        restore-keys: |
          ${{ runner.os }}-uv-

    - name: Install dependencies with uv
      shell: bash
      working-directory: py
      run: |
        uv sync --extra core


================================================
FILE: .github/actions/setup-python-light/action.yml
================================================
name: 'Setup Python for R2R Light'
description: 'Sets up Python environment and installs dependencies using uv'

inputs:
  os:
    description: 'Operating system'
    required: true
  python-version:
    description: 'Python version to use'
    required: false
    default: '3.12'

runs:
  using: "composite"
  steps:
    - name: Set up Python environment
      uses: actions/setup-python@v5
      with:
        python-version: ${{ inputs.python-version }}
        cache: 'pip'

    - name: Install uv
      shell: bash
      run: |
        pip install uv

    - name: Cache uv dependencies
      uses: actions/cache@v4
      with:
        path: |
          py/.venv
          py/uv.lock
        key: ${{ runner.os }}-uv-${{ hashFiles('py/pyproject.toml', 'py/uv.lock') }}
        restore-keys: |
          ${{ runner.os }}-uv-

    - name: Install dependencies with uv
      shell: bash
      working-directory: py
      run: |
        uv sync --extra core
        uv pip install pip wheel


================================================
FILE: .github/actions/start-r2r-full/action.yml
================================================
name: 'Start R2R Server'
description: 'Starts the R2R server'
runs:
  using: "composite"
  steps:
  - name: Inspect Docker image manifests
    shell: bash
    run: |
      docker manifest inspect ragtoriches/prod:latest

  - name: Start R2R Server
    shell: bash
    run: |
      cd py
      docker build -t r2r/local .
      export R2R_CONFIG_NAME=full_azure
      export R2R_IMAGE=r2r/local
      docker compose -f r2r/compose.full.yaml --project-name r2r-full up -d
      uv run r2r serve --docker --full --config-name=full_azure --build --image=r2r-local


================================================
FILE: .github/actions/start-r2r-light/action.yml
================================================
name: 'Start R2R Server'
description: 'Starts the R2R server'
inputs:
  config-name:
    description: 'The R2R configuration name to use'
    required: false
    default: 'r2r_azure_with_test_limits'
runs:
  using: "composite"
  steps:
    - name: Start R2R server
      shell: bash
      run: |
        cd py
        export R2R_CONFIG_NAME=${{ inputs.config-name }}
        uv run python -m r2r.serve &
        echo "Waiting for services to start..."
        sleep 30


================================================
FILE: .github/workflows/build-cluster-service-docker.yml
================================================
name: Build and Publish Cluster Service Docker Image

on:
  workflow_dispatch:

env:
  REGISTRY_BASE: ragtoriches

jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install toml package
        run: pip install toml

      - name: Determine version
        id: version
        run: |
          echo "REGISTRY_IMAGE=${{ env.REGISTRY_BASE }}/cluster-prod" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Docker Auth
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }}
          password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }}

      - name: Build and push image
        uses: docker/build-push-action@v5
        with:
          context: ./services/clustering
          file: ./services/clustering/Dockerfile.clustering
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.version.outputs.REGISTRY_IMAGE }}:latest
          provenance: false
          sbom: false

      - name: Verify manifest
        run: |
          docker buildx imagetools inspect ${{ steps.version.outputs.REGISTRY_IMAGE }}:latest


================================================
FILE: .github/workflows/build-r2r-docker.yml
================================================
name: Build and Publish R2R Docker Image

on:
  workflow_dispatch:

env:
  REGISTRY_IMAGE: sciphiai/r2r

jobs:
  prepare:
    runs-on: ubuntu-latest
    outputs:
      release_version: ${{ steps.version.outputs.RELEASE_VERSION }}
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.12'

      - name: Install toml package
        run: pip install toml

      - name: Determine version
        id: version
        run: |
          VERSION=$(python -c "import toml; print(toml.load('py/pyproject.toml')['project']['version'])")
          echo "RELEASE_VERSION=$VERSION" >> $GITHUB_OUTPUT

      - name: Set matrix
        id: set-matrix
        run: |
          echo "matrix={\"include\":[{\"platform\":\"amd64\",\"runner\":\"ubuntu-latest\"},{\"platform\":\"arm64\",\"runner\":\"arm64\"}]}" >> $GITHUB_OUTPUT

  build:
    needs: prepare
    strategy:
      fail-fast: false
      matrix: ${{fromJson(needs.prepare.outputs.matrix)}}
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v4

      - name: Echo Commit Hash
        run: |
          COMMIT_HASH=$(git rev-parse HEAD)
          echo "Building commit hash: $COMMIT_HASH"

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Docker Auth
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push image
        uses: docker/build-push-action@v5
        with:
          context: ./py
          file: ./py/Dockerfile
          platforms: ${{ matrix.platform }}
          no-cache: true
          push: true
          tags: |
            ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }}-${{ matrix.platform }}
            ${{ env.REGISTRY_IMAGE }}:latest-${{ matrix.platform }}
          provenance: false
          sbom: false

  create-manifest:
    needs: [prepare, build]
    runs-on: ubuntu-latest
    steps:
      - name: Docker Auth
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Create and push multi-arch manifest
        run: |
          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }} \
            ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }}-amd64 \
            ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }}-arm64

          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
            ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }}-amd64 \
            ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }}-arm64

      - name: Verify manifests
        run: |
          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ needs.prepare.outputs.release_version }}
          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:latest

  success-check:
    needs: [create-manifest, prepare]
    runs-on: ubuntu-latest
    steps:
      - name: Always succeed
        run: exit 0


================================================
FILE: .github/workflows/build-unst-service-docker.yml
================================================
name: Build and Publish Unstructured Service Docker Image

on:
  workflow_dispatch:

env:
  REGISTRY_BASE: ragtoriches

jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install toml package
        run: pip install toml

      - name: Determine version
        id: version
        run: |
          echo "REGISTRY_IMAGE=${{ env.REGISTRY_BASE }}/unst-prod" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Docker Auth
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }}
          password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }}

      - name: Build and push image
        uses: docker/build-push-action@v5
        with:
          context: ./services/unstructured
          file: ./services/unstructured/Dockerfile.unstructured
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.version.outputs.REGISTRY_IMAGE }}:latest
          provenance: false
          sbom: false

      - name: Verify manifest
        run: |
          docker buildx imagetools inspect ${{ steps.version.outputs.REGISTRY_IMAGE }}:latest


================================================
FILE: .github/workflows/publish-to-npm.yml
================================================
name: Publish NPM Package

on:
  workflow_dispatch:

jobs:
  publish:
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: js/sdk
    steps:
      - uses: actions/checkout@v4

      - name: Set up Node.js
        uses: actions/setup-node@v3
        with:
          node-version: '20'
          registry-url: 'https://registry.npmjs.org'

      - name: Install pnpm
        uses: pnpm/action-setup@v2
        with:
          version: 6.0.2

      - name: Install dependencies
        run: pnpm install

      - name: Build
        run: pnpm run build

      - name: Publish to npm
        run: pnpm publish --no-git-checks
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}


================================================
FILE: .github/workflows/publish-to-pypi.yml
================================================
name: Publish to PyPI

on:
  push:
    branches:
      - dev
      - dev-minor
  workflow_dispatch:

jobs:
  publish:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install tools
        run: pip install twine tomlkit build

      - name: Bump version for dev branches (TestPyPI)
        if: github.event_name == 'push'
        run: |
          cd py
          old_version=$(python -c "import tomlkit; d=tomlkit.parse(open('pyproject.toml').read()); print(d['project']['version'])")
          new_version="${old_version}a$(date +'%Y%m%d%H%M')"
          python -c "import tomlkit; d=tomlkit.parse(open('pyproject.toml').read()); d['project']['version']='$new_version'; open('pyproject.toml','w').write(tomlkit.dumps(d))"

      - name: Build distributions
        run: |
          cd py
          python -m build

      - name: Publish to TestPyPI
        if: github.event_name == 'push'
        env:
          PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
          TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}
        run: |
          cd py
          twine upload --repository-url https://test.pypi.org/legacy/ -u __token__ -p "$TEST_PYPI_API_TOKEN" dist/*

      - name: Publish to PyPI
        if: github.event_name == 'workflow_dispatch'
        env:
          PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
          PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
        run: |
          cd py
          twine upload -u __token__ -p "$PYPI_API_TOKEN" dist/*


================================================
FILE: .github/workflows/quality.yml
================================================
name: Code Quality Checks

on:
  push:
    branches: [ '**' ]
  pull_request:

jobs:
  pre-commit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.x'

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install pre-commit
          pip install mypy
          pip install types-requests types-toml types-aiofiles

      - name: Run pre-commit hooks
        run: |
          pre-commit run --all-files


================================================
FILE: .github/workflows/r2r-full-py-integration-tests.yml
================================================
name: R2R Full Python Integration Test (ubuntu)

on:
  workflow_dispatch:

jobs:
  integration-test:
    runs-on: ubuntu-latest
    timeout-minutes: 30

    env:
      TELEMETRY_ENABLED: 'false'
      R2R_PROJECT_NAME: r2r_default
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
      AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
      AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
      PYTHONUNBUFFERED: '1'
      PYTEST_ADDOPTS: '--color=yes'

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up Python and install dependencies
        uses: ./.github/actions/setup-python-full
        with:
          os: ubuntu-latest
          python-version: '3.12'

      - name: Setup and start Docker
        uses: ./.github/actions/setup-docker
        id: docker-setup

      - name: Login Docker
        uses: ./.github/actions/login-docker
        with:
          docker_username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }}
          docker_password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }}

      - name: Start R2R Full server
        uses: ./.github/actions/start-r2r-full

      - name: Wait for server to be ready
        run: |
          timeout=300  # 5 minutes timeout
          while ! curl -s http://localhost:7272/health > /dev/null; do
            if [ $timeout -le 0 ]; then
              echo "Server failed to start within timeout"
              exit 1
            fi
            echo "Waiting for server to be ready..."
            sleep 5
            timeout=$((timeout - 5))
          done

      - name: Run R2R Full Python Integration Test
        run: |
          cd py && uv run pytest tests/unit \
            --verbose \
            --capture=no \
            --log-cli-level=INFO
      - name: Run R2R Full Python Integration Test
        run: |
          cd py && uv run pytest tests/integration \
            --verbose \
            --capture=no \
            --log-cli-level=INFO

      - name: Check for test failures
        if: failure()
        run: |
          echo "::error::Integration tests failed. Check the test results artifact for details."
          exit 1

    services:
      redis:
        image: redis:latest
        ports:
          - 6379:6379
        options: >-
          --health-cmd "redis-cli ping"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5


================================================
FILE: .github/workflows/r2r-js-sdk-ci.yml
================================================
name: R2R JS SDK Integration CI

on:
  push:
    branches: [main]
    paths:
      - 'js/sdk/**'
  pull_request:
    branches: [main]
    paths:
      - 'js/sdk/**'

jobs:
  build-and-test:
    runs-on: ubuntu-latest

    defaults:
      run:
        working-directory: ./js/sdk

    steps:
      - uses: actions/checkout@v4

      - name: Use Node.js
        uses: actions/setup-node@v4
        with:
          node-version: "18"

      - name: Install pnpm
        uses: pnpm/action-setup@v4
        with:
          version: 8

      - name: Install dependencies
        run: pnpm install

      - name: Build
        run: pnpm run build


================================================
FILE: .github/workflows/r2r-js-sdk-integration-tests.yml
================================================
name: R2R JS SDK Integration Tests

on:
  push:
    branches:
      - '**'

jobs:
  setup:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python and install dependencies
        uses: ./.github/actions/setup-python-light
        with:
          os: ubuntu-latest
      - name: Setup and start PostgreSQL
        uses: ./.github/actions/setup-postgres-ext
        with:
          os: ubuntu-latest
      - name: Start R2R Light server
        uses: ./.github/actions/start-r2r-light
      - name: Use Node.js
        uses: actions/setup-node@v2
        with:
          node-version: "20.x"
      - name: Install pnpm
        uses: pnpm/action-setup@v2
        with:
          version: 8.x
          run_install: false
      - name: Install JS SDK dependencies
        working-directory: ./js/sdk
        run: pnpm install
      - name: Check if R2R server is running
        run: |
          curl http://localhost:7272/v2/health || echo "Server not responding"

  v3-integration-tests:
    needs: setup
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        test-group:
          - ChunksIntegrationSuperUser.test.ts
          - CollectionsIntegrationSuperUser.test.ts
          - ConversationsIntegrationSuperUser.test.ts
          - DocumentsAndCollectionsIntegrationUser.test.ts
          - DocumentsIntegrationSuperUser.test.ts
          - GraphsIntegrationSuperUser.test.ts
          - PromptsIntegrationSuperUser.test.ts
          - RetrievalIntegrationSuperUser.test.ts
          - SystemIntegrationSuperUser.test.ts
          - SystemIntegrationUser.test.ts
          - UsersIntegrationSuperUser.test.ts
    env:
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
      AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
      AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
      TELEMETRY_ENABLED: 'false'
      R2R_POSTGRES_HOST: localhost
      R2R_POSTGRES_DBNAME: postgres
      R2R_POSTGRES_PORT: '5432'
      R2R_POSTGRES_PASSWORD: postgres
      R2R_POSTGRES_USER: postgres
      R2R_PROJECT_NAME: r2r_default
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python and install dependencies
        uses: ./.github/actions/setup-python-light
        with:
          os: ubuntu-latest
      - name: Setup and start PostgreSQL
        uses: ./.github/actions/setup-postgres-ext
        with:
          os: ubuntu-latest
      - name: Start R2R Light server
        uses: ./.github/actions/start-r2r-light
      - name: Use Node.js
        uses: actions/setup-node@v2
        with:
          node-version: "20.x"
      - name: Install pnpm
        uses: pnpm/action-setup@v2
        with:
          version: 8.x
          run_install: false
      - name: Install JS SDK dependencies
        working-directory: ./js/sdk
        run: pnpm install
      - name: Run remaining tests
        working-directory: ./js/sdk
        run: pnpm jest ${{ matrix.test-group }}


================================================
FILE: .github/workflows/r2r-light-py-integration-tests.yml
================================================
name: R2R Light Python Integration Test (ubuntu)

on:
  push:
    branches:
      - main
    paths:
      - 'py/**'
      - '.github/workflows/**'
      - 'tests/**'
  pull_request:
    branches:
      - dev
      - dev-minor
      - main
    paths:
      - 'py/**'
      - '.github/workflows/**'
      - 'tests/**'
  workflow_dispatch:

jobs:
  package-install-test:
    runs-on: ubuntu-latest
    timeout-minutes: 5

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.12'

      - name: Install package and test import
        run: |
          cd py
          pip install -e .
          python -c "from r2r import R2RClient; print('Import successful!')"

      - name: Check for import errors
        if: failure()
        run: |
          echo "::error::Package installation or import test failed."
          exit 1

  integration-test-azure-openai:
    needs: package-install-test
    runs-on: ubuntu-latest
    timeout-minutes: 20

    env:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
      AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
      AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
      AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
      MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
      TELEMETRY_ENABLED: 'false'
      R2R_POSTGRES_HOST: localhost
      R2R_POSTGRES_DBNAME: postgres
      R2R_POSTGRES_PORT: '5432'
      R2R_POSTGRES_PASSWORD: postgres
      R2R_POSTGRES_USER: postgres
      R2R_PROJECT_NAME: r2r_default
      PYTHONUNBUFFERED: '1'
      PYTEST_ADDOPTS: '--color=yes'

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Poppler
        run: |
          sudo apt-get update
          sudo apt-get install -y poppler-utils

      - name: Set up Python and install dependencies
        uses: ./.github/actions/setup-python-light
        with:
          os: ubuntu-latest
          python-version: '3.12'

      - name: Setup and start PostgreSQL
        uses: ./.github/actions/setup-postgres-ext
        with:
          os: ubuntu-latest

      - name: Verify PostgreSQL and Vector Extension
        run: |
          pg_isready -h localhost -p 5432
          sudo -u postgres psql -c "\dx vector;"

      - name: Start R2R Light server
        uses: ./.github/actions/start-r2r-light
        id: start-server

      - name: Wait for server to be ready
        run: |
          timeout=300  # 5 minutes timeout
          while ! curl -s http://localhost:7272/health > /dev/null; do
            if [ $timeout -le 0 ]; then
              echo "Server failed to start within timeout"
              exit 1
            fi
            echo "Waiting for server to be ready..."
            sleep 5
            timeout=$((timeout - 5))
          done

      - name: Run R2R Light Python Integration Test
        run: |
          cd py && uv run pytest tests/unit \
            --verbose \
            --capture=no \
            --log-cli-level=INFO

      - name: Check for test failures
        if: failure()
        run: |
          echo "::error::Integration tests failed. Check the test results artifact for details."
          exit 1

  integration-test-gemini:
    needs: package-install-test
    runs-on: ubuntu-latest
    timeout-minutes: 20

    env:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
      AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
      AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
      AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
      MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
      TELEMETRY_ENABLED: 'false'
      R2R_POSTGRES_HOST: localhost
      R2R_POSTGRES_DBNAME: postgres
      R2R_POSTGRES_PORT: '5432'
      R2R_POSTGRES_PASSWORD: postgres
      R2R_POSTGRES_USER: postgres
      R2R_PROJECT_NAME: r2r_default
      PYTHONUNBUFFERED: '1'
      PYTEST_ADDOPTS: '--color=yes'

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Poppler
        run: |
          sudo apt-get update
          sudo apt-get install -y poppler-utils

      - name: Set up Python and install dependencies
        uses: ./.github/actions/setup-python-light
        with:
          os: ubuntu-latest
          python-version: '3.12'

      - name: Setup and start PostgreSQL
        uses: ./.github/actions/setup-postgres-ext
        with:
          os: ubuntu-latest

      - name: Verify PostgreSQL and Vector Extension
        run: |
          pg_isready -h localhost -p 5432
          sudo -u postgres psql -c "\dx vector;"

      - name: Start R2R Light server with Gemini config
        uses: ./.github/actions/start-r2r-light
        id: start-server
        with:
          config-name: gemini
        env:
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}

      - name: Wait for server to be ready
        run: |
          timeout=300  # 5 minutes timeout
          while ! curl -s http://localhost:7272/health > /dev/null; do
            if [ $timeout -le 0 ]; then
              echo "Server failed to start within timeout"
              exit 1
            fi
            echo "Waiting for server to be ready..."
            sleep 5
            timeout=$((timeout - 5))
          done

      - name: Run R2R Light Python Integration Test
        run: |
          cd py && uv run pytest tests/unit \
            --verbose \
            --capture=no \
            --log-cli-level=INFO

      - name: Check for test failures
        if: failure()
        run: |
          echo "::error::Gemini integration tests failed. Check the test results artifact for details."
          exit 1

  integration-test-azure-openai-full:
    needs: integration-test-azure-openai
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        test-group:
        - name: "agent"
          path: "tests/integration/test_agent.py"
        # - name: "base"
        #   path: "tests/integration/test_base.py"
        - name: "chunks"
          path: "tests/integration/test_chunks.py"
        - name: "collections"
          path: "tests/integration/test_collections.py"
        - name: "collections_users_interaction"
          path: "tests/integration/test_collections_users_interaction.py"
        - name: "conversations"
          path: "tests/integration/test_conversations.py"
        - name: "documents"
          path: "tests/integration/test_documents.py"
        - name: "filters"
          path: "tests/integration/test_filters.py"
        - name: "graphs"
          path: "tests/integration/test_graphs.py"
        - name: "indices"
          path: "tests/integration/test_indices.py"
        - name: "ingestion"
          path: "tests/integration/test_ingestion.py"
        - name: "retrieval"
          path: "tests/integration/test_retrieval.py"
        - name: "retrieval_advanced"
          path: "tests/integration/test_retrieval_advanced.py"
        # - name: "system"
        #   path: "tests/integration/test_system.py"
        - name: "users"
          path: "tests/integration/test_users.py"
    timeout-minutes: 20
    env:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
      AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
      AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
      AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
      MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
      TELEMETRY_ENABLED: 'false'
      R2R_POSTGRES_HOST: localhost
      R2R_POSTGRES_DBNAME: postgres
      R2R_POSTGRES_PORT: '5432'
      R2R_POSTGRES_PASSWORD: postgres
      R2R_POSTGRES_USER: postgres
      R2R_PROJECT_NAME: r2r_default
      PYTHONUNBUFFERED: '1'
      PYTEST_ADDOPTS: '--color=yes'

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Poppler
        run: |
          sudo apt-get update
          sudo apt-get install -y poppler-utils

      - name: Set up Python and install dependencies
        uses: ./.github/actions/setup-python-light
        with:
          os: ubuntu-latest
          python-version: '3.12'

      - name: Setup and start PostgreSQL
        uses: ./.github/actions/setup-postgres-ext
        with:
          os: ubuntu-latest

      - name: Verify PostgreSQL and Vector Extension
        run: |
          pg_isready -h localhost -p 5432
          sudo -u postgres psql -c "\dx vector;"

      - name: Start R2R Light server
        uses: ./.github/actions/start-r2r-light
        id: start-server

      - name: Wait for server to be ready
        run: |
          timeout=300  # 5 minutes timeout
          while ! curl -s http://localhost:7272/health > /dev/null; do
            if [ $timeout -le 0 ]; then
              echo "Server failed to start within timeout"
              exit 1
            fi
            echo "Waiting for server to be ready..."
            sleep 5
            timeout=$((timeout - 5))
          done

      - name: Run R2R Integration Test - ${{ matrix.test-group.name }}
        run: |
          cd py && uv run pytest ${{ matrix.test-group.path }} \
            --verbose \
            --capture=no \
            --log-cli-level=INFO

      - name: Check for test failures
        if: failure()
        run: |
          echo "::error::Integration tests failed. Check the test results artifact for details."
          exit 1


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.0.0
    hooks:
      - id: trailing-whitespace
        exclude: ^.venv/
      - id: end-of-file-fixer
        exclude: ^.venv/
      - id: check-added-large-files
        exclude: ^.venv/
      - id: check-ast
        exclude: ^.venv/
      - id: check-yaml
        exclude: ^(.venv/|deployment/)

  - repo: local
    hooks:
      - id: check-typing-imports
        name: Check for Dict, List, or Union usage
        entry: bash -c 'echo "Checking for typing imports..." && FOUND=$(cd "$(git rev-parse --show-toplevel)" && find . -path "*/py/*.py" | grep -v "venv" | grep -v "/.venv/" | grep -v "/site-packages/" | grep -v "test_" | grep -v "/migrations/" | xargs grep -l "from typing.*import.*[^d]Dict\\|from typing.*import.*List\\|from typing.*import.*Union" 2>/dev/null || echo "") && if [ -n "$FOUND" ]; then echo "$FOUND"; echo "  Please import dict instead of Dict, list instead of List, and the logical OR operator"; exit 1; else echo "No problematic imports found!"; exit 0; fi'
        language: system
        types: [python]
        pass_filenames: false

  - repo: local
    hooks:
      - id: check-print-statements
        name: Check for print statements
        entry: bash -c 'echo "Checking for print statements..." && FOUND=$(cd "$(git rev-parse --show-toplevel)" && find . -path "*/py/*.py" | grep -v "venv" | grep -v "/.venv/" | grep -v "/site-packages/" | grep -v "test_" | grep -v "/core/examples/" | grep -v "/migrations/" | grep -v "/tests/" | grep -v "/examples.py" | xargs grep -l "print(" 2>/dev/null || echo "") && if [ -n "$FOUND" ]; then echo "$FOUND"; echo "Found print statements!"; exit 1; else echo "No print statements found!"; exit 0; fi'
        language: system
        types: [python]
        pass_filenames: false
        exclude: ^(.venv/|py/.venv/|py/core/examples/|py/migrations/|py/tests/)

  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.9.6
    hooks:
      - id: ruff
        args: [--fix]
        files: ^py/
        exclude: ^(py/tests/|.venv/)
      - id: ruff-format
        files: ^py/
        exclude: ^(py/tests/|.venv/)

  - repo: local
    hooks:
      - id: mypy
        name: mypy
        entry: bash -c 'cd "$(git rev-parse --show-toplevel)/py" && python -m mypy --exclude "migrations" --exclude "venv*" --exclude "test_*" .'
        language: system
        types: [python]
        pass_filenames: false
        exclude: ^(.venv/|migrations/)


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct Summary

TL;DR: Be nice. Be respectful. Be professional. Don't be a jerk.

## Commitment

We strive for a harassment-free, inclusive, and healthy community experience for all, regardless of personal characteristics or background.

## Expected Behaviors

- **Empathy and Kindness**: Show understanding and kindness to others.
- **Respect**: Value different viewpoints and experiences.
- **Constructive Feedback**: Offer and accept feedback graciously.
- **Accountability**: Own up to mistakes and learn from them.
- **Community Focus**: Prioritize what's best for the whole community.

## Unacceptable Behaviors

- **Sexualized Content**: Avoid sexual language and unwelcome sexual attention.
- **Disrespect**: No trolling, insults, or derogatory comments.
- **Harassment**: Public or private harassment is unacceptable.
- **Privacy Violations**: Do not share private information without consent.
- **Inappropriate Conduct**: Behavior not suitable for a professional setting is not allowed.

## Enforcement

- **Leaders' Responsibility**: Leaders clarify standards and take corrective actions.
- **Scope**: Applies to all community spaces and when representing the community.
- **Reporting**: Incidents can be reported to owen@sciphi.ai.

## Enforcement Guidelines

- **Correction**: Private warning for unprofessional behavior.
- **Warning**: Consequences for repeated violations.
- **Temporary Ban**: For serious or sustained inappropriate behavior.
- **Permanent Ban**: For egregious violations, including harassment.

## Attribution

Adapted from the [Contributor Covenant version 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html), with Community Impact Guidelines inspired by [Mozilla's code of conduct enforcement ladder](https://www.mozilla.org/en-US/about/governance/policies/participation/).

For more details and FAQs, visit [https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq). Translations are available [here](https://www.contributor-covenant.org/translations).


================================================
FILE: CONTRIBUTING.md
================================================
# R2R Contribution Guide

## Quick Start

- **Pre-Discussion**: Feel free to propose your ideas via issues, [Discord](https://discord.gg/p6KqD2kjtB) if you want to get early feedback.
- **Code of Conduct**: Adhere to our [Code of Conduct](./CODE_OF_CONDUCT.md) in all interactions.
- **Pull Requests (PRs)**: Follow the PR process for contributions.

## Pull Request Process

1. **Dependencies**: Ensure all dependencies are necessary and documented.
2. **Documentation**: Update README.md with any changes to interfaces, including new environment variables, exposed ports, and other relevant details.
3. **Versioning**: Increment version numbers in examples and README.md following [SemVer](http://semver.org/).
4. **Review**: A PR can be merged after receiving approval from at least two other developers. If you lack merge permissions, request a review for merging.

## Attribution

This Code of Conduct adapts from the [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4/).


================================================
FILE: LICENSE.md
================================================
The MIT License (MIT)

Copyright (c) 2024 EmergentAGI Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: MANIFEST.md
================================================
# The R2R Manifest

We will do our best to build useful AI tools for developers _(before AGI)_.


================================================
FILE: SECURITY.md
================================================

# Security Policy

At R2R, we take the security of our project and its users seriously. We appreciate the contributions of security researchers and developers in helping us identify and address potential vulnerabilities.

## Reporting a Vulnerability

If you discover a potential security vulnerability in R2R, please follow these steps to report it:

1. Create a new issue on the GitHub repository using the "Vulnerability Disclosure" issue template.
2. Set the issue as "confidential" if you are unsure whether the issue is a potential vulnerability or not. It is easier to make a confidential issue public than to remediate an issue that should have been confidential.
3. Label the issue with the `security` label at a minimum. Additional labels may be applied by the security team and other project maintainers to assist with the triage process.
4. Provide a detailed description of the vulnerability, including steps to reproduce, potential impact, and any other relevant information.
5. If the issue contains sensitive information or user-specific data, such as private repository contents, assign the `keep confidential` label to the issue. If possible, avoid including such information directly in the issue and instead provide links to resources that are only accessible to the project maintainers.

## Vulnerability Handling Process

Once a vulnerability is reported, the R2R security team will follow these steps:

1. Acknowledge receipt of the vulnerability report within 48 hours.
2. Assess the severity and impact of the vulnerability.
3. Develop a fix or mitigation plan for the vulnerability.
4. Notify the reporter about the progress and estimated timeline for the fix.
5. Once the fix is ready, release a new version of R2R that addresses the vulnerability.
6. Publicly disclose the vulnerability and the fix after a reasonable period to allow users to update their installations.

## Scope

This security policy applies to the R2R codebase and its dependencies. It does not cover vulnerabilities in the underlying operating systems, hardware, or third-party libraries used by R2R.

## Recognition

We greatly appreciate the efforts of security researchers and developers who responsibly disclose vulnerabilities to us. With your permission, we will acknowledge your contribution in the release notes and any public disclosures related to the vulnerability.

## Contact

If you have any questions or concerns regarding the security of R2R, please contact the project maintainers at [security@r2r.com](mailto:security@r2r.com).

Thank you for helping us keep R2R and its users secure!


================================================
FILE: deployment/k8s/kustomizations/helm-values_hatchet.yaml
================================================
# sharedConfig is inherited by all backend services: api, grpc, controllers, scheduler
sharedConfig:
  # you can disable shared config by setting this to false
  enabled: true

  # these are the most commonly configured values
  serverUrl: "http://localhost:8080"
  serverAuthCookieDomain: "localhost:8080" # the domain for the auth cookie
  serverAuthCookieInsecure: "t" # allows cookies to be set over http
  serverAuthSetEmailVerified: "t" # automatically sets email_verified to true for all users
  serverAuthBasicAuthEnabled: "t" # allows login via basic auth (email/password)
  grpcBroadcastAddress: "localhost:7070" # the endpoint for the gRPC server, exposed via the `grpc` service
  grpcInsecure: "true" # allows gRPC to be served over http
#  defaultAdminEmail: "" # in exposed/production environments, change this to a valid email
#  defaultAdminPassword: "" # in exposed/production environments, change this to a secure password

  # you can set additional environment variables here, which will override any defaults
  env: {}

api:
  enabled: true
  replicaCount: 2
  image:
    repository: "ghcr.io/hatchet-dev/hatchet/hatchet-api"
    tag: "v0.54.7"
    pullPolicy: "Always"
  migrationJob:
    image:
      repository: "ghcr.io/hatchet-dev/hatchet/hatchet-migrate"
  serviceAccount:
    create: true
    name: hatchet-api
  envFrom:
    - secretRef:
        name: hatchet-shared-config
  ingress:
    enabled: false
  health:
    enabled: true
    spec:
      livenessProbe:
        httpGet:
          path: /api/live
          port: 8080
        periodSeconds: 5
        initialDelaySeconds: 60
      readinessProbe:
        httpGet:
          path: /api/ready
          port: 8080
        periodSeconds: 5
        initialDelaySeconds: 20

grpc:
  enabled: true
  nameOverride: hatchet-grpc
  fullnameOverride: hatchet-grpc
  replicaCount: 1
  image:
    repository: "ghcr.io/hatchet-dev/hatchet/hatchet-engine"
    tag: "v0.54.7"
    pullPolicy: "Always"
  setupJob:
    enabled: false
  service:
    externalPort: 7070
    internalPort: 7070
  commandline:
    command: ["/hatchet/hatchet-engine"]
  deployment:
    annotations:
      app.kubernetes.io/name: hatchet-grpc
  serviceAccount:
    create: true
    name: hatchet-grpc
  envFrom:
    - secretRef:
        name: hatchet-shared-config
  ingress:
    enabled: false
  health:
    enabled: true
    spec:
      livenessProbe:
        httpGet:
          path: /live
          port: 8733
        periodSeconds: 5
        initialDelaySeconds: 60
      readinessProbe:
        httpGet:
          path: /ready
          port: 8733
        periodSeconds: 5
        initialDelaySeconds: 20

controllers:
  enabled: true
  nameOverride: controllers
  fullnameOverride: controllers
  replicaCount: 1
  image:
    repository: "ghcr.io/hatchet-dev/hatchet/hatchet-engine"
    tag: "v0.54.7"
    pullPolicy: "Always"
  setupJob:
    enabled: false
  service:
    externalPort: 7070
    internalPort: 7070
  commandline:
    command: ["/hatchet/hatchet-engine"]
  deployment:
    annotations:
      app.kubernetes.io/name: controllers
  serviceAccount:
    create: true
    name: controllers
  envFrom:
    - secretRef:
        name: hatchet-shared-config
  ingress:
    enabled: false
  health:
    enabled: true
    spec:
      livenessProbe:
        httpGet:
          path: /live
          port: 8733
        periodSeconds: 5
        initialDelaySeconds: 60
      readinessProbe:
        httpGet:
          path: /ready
          port: 8733
        periodSeconds: 5
        initialDelaySeconds: 20

scheduler:
  enabled: true
  nameOverride: scheduler
  fullnameOverride: scheduler
  replicaCount: 1
  image:
    repository: "ghcr.io/hatchet-dev/hatchet/hatchet-engine"
    tag: "v0.54.7"
    pullPolicy: "Always"
  setupJob:
    enabled: false
  service:
    externalPort: 7070
    internalPort: 7070
  commandline:
    command: ["/hatchet/hatchet-engine"]
  deployment:
    annotations:
      app.kubernetes.io/name: scheduler
  serviceAccount:
    create: true
    name: scheduler
  envFrom:
    - secretRef:
        name: hatchet-shared-config
  ingress:
    enabled: false
  health:
    enabled: true
    spec:
      livenessProbe:
        httpGet:
          path: /live
          port: 8733
        periodSeconds: 5
        initialDelaySeconds: 60
      readinessProbe:
        httpGet:
          path: /ready
          port: 8733
        periodSeconds: 5
        initialDelaySeconds: 20

frontend:
  enabled: true
  image:
    repository: "ghcr.io/hatchet-dev/hatchet/hatchet-frontend"
    tag: "v0.54.7"
    pullPolicy: "Always"
  service:
    externalPort: 8080
    internalPort: 80
  ingress:
    enabled: false

postgres:
  enabled: false
  auth:
#    username: ""
#    password: ""
    database: "hatchet"
  tls:
    enabled: false
  primary:
    service:
      ports:
        postgresql: 5432

rabbitmq:
  enabled: true
  auth:
#    username: ""
#    password: ""
  service:
    ports:
      amqp: 5672

caddy:
  enabled: false


================================================
FILE: deployment/k8s/kustomizations/helm-values_postgresql.yaml
================================================
auth:
  existingSecret: r2r-hatchet-secrets
  secretKeys:
    adminPasswordKey: HATCHET_DATABASE_POSTGRES_POSTGRES_PASSWORD
    userPasswordKey: HATCHET_DATABASE_POSTGRES_PASSWORD
    replicationPasswordKey: HATCHET_DATABASE_POSTGRES_REPLICA_PASSWORD

#creates hatchet database
global:
  storageClass: csi-sc
  postgresql:
    auth:
      database: hatchet


================================================
FILE: deployment/k8s/kustomizations/include/cm-hatchet.yaml
================================================
---
# hatchet-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: hatchet-configmap
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
data:
  #New
  HATCHET_CLIENT_TLS_STRATEGY: "none"
  HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH: "134217728"
  HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH: "134217728"

  HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CONF: "false"
  HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CERT: "false"
  HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_APIKEY: "false"
  HATCHET_TENANT_ID: "707d0855-80ab-4e1f-a156-f1c4546cbf52"
  RABBITMQ_URL: "http://hatchet-rabbitmq"
  RABBITMQ_MGMT_PORT: "15672"


================================================
FILE: deployment/k8s/kustomizations/include/cm-hatchet_OLD.yaml
================================================
---
# hatchet-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: hatchet-configmap
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
data:
#  DATABASE_POSTGRES_HOST: "hatchet-postgres"
  DATABASE_POSTGRES_HOST: "ferretdb-postgres-documentdb"
  DATABASE_POSTGRES_PORT: "5432"
  SERVER_AUTH_COOKIE_INSECURE: "t"
  SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
  SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
  SERVER_GRPC_INSECURE: "t"
  SERVER_AUTH_COOKIE_DOMAIN: "https://r2r.mywebsite.com"
  SERVER_URL: "http://hatchet-dashboard:80"

  HATCHET_DATABASE_POSTGRES_HOST: "ferretdb-postgres-documentdb"
  HATCHET_DATABASE_POSTGRES_PORT: "5432"
  SERVER_GRPC_PORT: "7077"
  SERVER_GRPC_MAX_MSG_SIZE: "134217728"


  HATCHET_DATABASE_POSTGRES_DB_NAME: "hatchet"
  #SERVER_AUTH_COOKIE_DOMAIN: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
  #SERVER_URL: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
  HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_APIKEY: "false"
  HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CONF: "false"
  HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CERT: "false"
  HATCHET_TENANT_ID: "707d0855-80ab-4e1f-a156-f1c4546cbf52"
#  R2R_RABBITMQ_PORT: "5672"
  RABBITMQ_MGMT_PORT: "15672"
  RABBITMQ_URL: "http://hatchet-rabbitmq"

  #New
  HATCHET_CLIENT_TLS_STRATEGY: "none"
  HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH: "134217728"
  HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH: "134217728"


================================================
FILE: deployment/k8s/kustomizations/include/cm-init-scripts-hatchet.yaml
================================================
# This file contains the initialization scripts used by the InitContainers in the Job manifests.

apiVersion: v1
kind: ConfigMap
metadata:
  name: hatchet-init-scripts
data:
  create-db.sh: |
    #!/bin/sh
    set -e
    echo 'Waiting for PostgreSQL to be ready...'
    DATABASE_POSTGRES_HOST=${DATABASE_POSTGRES_HOST:-hatchet-postgres}
    while ! pg_isready -h ${DATABASE_POSTGRES_HOST} -p ${DATABASE_POSTGRES_PORT} -U ${DATABASE_POSTGRES_USERNAME:-hatchet_user}; do
      sleep 1
    done
    echo 'PostgreSQL is ready, checking if database exists...'
    if ! PGPASSWORD=${DATABASE_POSTGRES_PASSWORD:-hatchet_password} psql -h ${DATABASE_POSTGRES_HOST} -p ${DATABASE_POSTGRES_PORT} -U ${DATABASE_POSTGRES_USERNAME:-hatchet_user} -lqt | grep -qw ${DATABASE_POSTGRES_DB_NAME:-hatchet}; then
      echo 'Database does not exist, creating it...'
      PGPASSWORD=${DATABASE_POSTGRES_PASSWORD:-hatchet_password} createdb -h ${DATABASE_POSTGRES_HOST} -p ${DATABASE_POSTGRES_PORT} -U ${DATABASE_POSTGRES_USERNAME:-hatchet_user} -w ${DATABASE_POSTGRES_DB_NAME:-hatchet}
    else
      echo 'Database already exists, skipping creation.'
    fi

  setup-config.sh: |

    echo '>>> Starting config creation process...'
    if [ "${HATCHET_CLIENT_TLS_STRATEGY}" = "none" ]; then
      echo "HATCHET_CLIENT_TLS_STRATEGY is set to none, skipping certificate creation."
      /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=${HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CONF:-false}
    else
      echo "HATCHET_CLIENT_TLS_STRATEGY is not none, creating certificates."
      /hatchet/hatchet-admin quickstart --cert-dir /hatchet/certs --generated-config-dir /hatchet/config --overwrite=${HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CONF:-false}
    fi

  setup-token.sh: |
    #!/bin/sh
    set -e

    echo '>>> Starting token creation process...'
    # Attempt to create token and capture both stdout and stderr
    TOKEN_OUTPUT=$(/hatchet/hatchet-admin token create --config /hatchet/config --tenant-id ${HATCHET_TENANT_ID:-00000000-0000-0000-0000-00000000} 2>&1)
    # Extract the token (assuming it's the only part that looks like a JWT)
    TOKEN=$(echo "$TOKEN_OUTPUT" | grep -Eo 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*')

    if [ -z "$TOKEN" ]; then
      echo 'Error: Failed to extract token. Full command output:' >&2
      echo "$TOKEN_OUTPUT" >&2
      exit 1
    fi

    echo "$TOKEN" > /tmp/hatchet_api_key
    echo 'Token created and saved to /tmp/hatchet_api_key'
    # Copy token to final destination
    #mkdir -p /hatchet_api_key/
    echo -n "$TOKEN" > /hatchet_api_key/api_key.txt
    echo '>>> Token copied to /hatchet_api_key/api_key.txt'

    # Verify token was copied correctly
    if [ "$(cat /tmp/hatchet_api_key)" != "$(cat /hatchet_api_key/api_key.txt)" ]; then
      echo 'Error: Token copy failed, files do not match' >&2
      echo 'Content of /tmp/hatchet_api_key:'
      cat /tmp/hatchet_api_key
      exit 1
    fi

    echo 'Hatchet API key has been saved successfully'
    echo 'Token length:' ${#TOKEN}
    echo 'Token (first 20 chars):' ${TOKEN:0:20}
    echo 'Token structure:' $(echo $TOKEN | awk -F. '{print NF-1}') 'parts'
    # Check each part of the token
    for i in 1 2 3; do
      PART=$(echo $TOKEN | cut -d. -f$i)
      echo 'Part' $i 'length:' ${#PART}
      echo 'Part' $i 'base64 check:' $(echo $PART | base64 -d >/dev/null 2>&1 && echo 'Valid' || echo 'Invalid')
    done
    # Final validation attempt
    if ! echo $TOKEN | awk -F. '{print $2}' | base64 -d 2>/dev/null | jq . >/dev/null 2>&1; then
      echo 'Warning: Token payload is not valid JSON when base64 decoded' >&2
    else
      echo 'Token payload appears to be valid JSON'
    fi

  # thsi relies on the Serviceaccount, Role & Bunding set up in k8s (Included)
  inject-secret.sh: |
    #!/bin/bash
    set -e

    # Wait for required config files
    MAX_WAIT=300
    WAIT_TIME=0
    CONFIG_FILES=("/hatchet/config/server.yaml" "/hatchet/config/database.yaml" "/hatchet_api_key/api_key.txt")

    while ! [[ -s "${CONFIG_FILES[0]}" && -s "${CONFIG_FILES[1]}" && -s "${CONFIG_FILES[2]}" ]]; do
        (( WAIT_TIME >= MAX_WAIT )) && { echo "Timeout waiting for config files."; exit 1; }
        echo "Waiting for config files to be created and not empty..."; sleep 10; (( WAIT_TIME += 10 ))
    done
    echo "Config files are ready."

    # Kubernetes API variables
    NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
    TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
    API_SERVER="https://kubernetes.default.svc:${KUBERNETES_SERVICE_PORT}"

    echo ">>> Processing secret: $2 in folder: $1. ALLOW_OVERRIDE: $3"

    update_secret() {
      local DIR="$1" SECRET_NAME="$2" ALLOW_OVERRIDE="${3:-false}"
      ALLOW_OVERRIDE=$(echo "$ALLOW_OVERRIDE" | tr '[:upper:]' '[:lower:]')
      local -a key_value_pairs=()

      echo "Processing directory: $DIR"; ls -la "$DIR"

      for f in "$DIR"/*; do
          [[ -f "$f" ]] || continue
          key=$(basename "$f")
          value=$(base64 "$f" | tr -d '\n')
          key_value_pairs+=("\"$key\":\"$value\"")
          echo "Found file: $f, key: $key"
      done

      local json_data=$(printf '{%s}' "$(IFS=, ; echo "${key_value_pairs[*]}")")
      local json_body
      json_body=$(jq -n \
        --arg name "$SECRET_NAME" \
        --arg ns "$NAMESPACE" \
        --arg data "$json_data" \
        '{apiVersion:"v1", kind:"Secret", metadata:{name:$name, namespace:$ns}, data: ($data | fromjson)}')

      #echo "Validated JSON Body: $json_body"

      # Check if the secret exists
      local response
      local response_code
      response_code=$(curl -s -o /dev/null -w "%{http_code}" --insecure --header "Authorization: Bearer ${TOKEN}" \
          "${API_SERVER}/api/v1/namespaces/${NAMESPACE}/secrets/${SECRET_NAME}")

      if [[ "$response_code" == "200" ]]; then
          [[ "$ALLOW_OVERRIDE" == "true" || "$ALLOW_OVERRIDE" == "1" ]] || {
              echo "ALLOW_OVERRIDE is false. Skipping update."; return;
          }
          echo "Updating existing secret: $SECRET_NAME"
          response=$(curl -s -X PUT --insecure --header "Authorization: Bearer ${TOKEN}" --header "Content-Type: application/json" \
              --data "$json_body" "${API_SERVER}/api/v1/namespaces/${NAMESPACE}/secrets/${SECRET_NAME}")
      else
          echo "Creating new secret: $SECRET_NAME"
          response=$(curl -s -X POST --insecure --header "Authorization: Bearer ${TOKEN}" --header  "Content-Type: application/json" \
            --data "$json_body" "${API_SERVER}/api/v1/namespaces/${NAMESPACE}/secrets")
      fi
      # Remove sensitive data before printing. All withing data.[*]: "[REDACTED]"
      echo "JSON:"
      echo "$response" | jq '.data |= with_entries(.value="[REDACTED]")'
    }

    update_secret "$1" "$2" "$3"
    echo "Finished processing secret: $2 in folder: $1. ALLOW_OVERRIDE: $3"
    exit 0

  check-service.sh: |
    #!/bin/sh
    set -e

    while true; do
        if wget -q -O - "${1}" > /dev/null 2>&1; then
            echo "Service is reachable at ${1}"
            break
        else
            echo "Service is not reachable at ${1}. Retrying in 10 seconds..."
            sleep 10
        fi
    done

  check-file.sh: |
    #!/bin/sh
    set -e

    while true; do
        if [ -s "${1}" ]; then
            echo "File ${1} exists and is not empty."
            break
        else
            if [ -f "${1}" ]; then
                echo "File ${1} exists but is empty."
            else
                echo "File ${1} does not exist."
            fi
            echo "Retrying in 10 seconds..."
            sleep 10
        fi
    done
  nginx.conf: |
    events {
        worker_connections 2048;
        use epoll;
        multi_accept on;
    }

    http {
        # Required basic settings
        include       /etc/nginx/mime.types;
        default_type  application/octet-stream;
        client_max_body_size 100M;

        # Logging settings
        log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                          '$status $body_bytes_sent "$http_referer" '
                          '"$http_user_agent" "$http_x_forwarded_for"';
        access_log  /var/log/nginx/access.log  main;

        # Connection optimization
        sendfile        on;
        tcp_nopush      on;
        tcp_nodelay     on;
        keepalive_timeout  65;

        upstream r2r_backend {
            least_conn;
            server r2r:7272 max_fails=3 fail_timeout=30s;  # Use service name instead of container names
            keepalive 32;
        }

        server {
            listen 80;
            server_name localhost;

            # Timeouts
            proxy_connect_timeout 300s;
            proxy_send_timeout 300s;
            proxy_read_timeout 300s;

            # Buffer settings
            proxy_buffers 8 16k;
            proxy_buffer_size 32k;

            location / {
                proxy_pass http://r2r_backend;
                proxy_http_version 1.1;
                proxy_set_header Upgrade $http_upgrade;
                proxy_set_header Connection 'upgrade';
                proxy_set_header Host $host;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_set_header X-Forwarded-Proto $scheme;

                # Retry settings
                proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
                proxy_next_upstream_tries 3;
                proxy_next_upstream_timeout 10s;
            }

            location /health {
                access_log off;
                add_header 'Content-Type' 'application/json';
                return 200 '{"status":"healthy"}';
            }

            # Error responses
            error_page 500 502 503 504 /50x.html;
            location = /50x.html {
                root /usr/share/nginx/html;
            }
        }
    }


================================================
FILE: deployment/k8s/kustomizations/include/cm-init-scripts-r2r.yaml
================================================
# This file contains the initialization scripts used by the InitContainers in the Job manifests.

apiVersion: v1
kind: ConfigMap
metadata:
  name: r2r-init-scripts
data:

  check-service.sh: |
    #!/bin/sh
    set -e

    while true; do
        if wget -q -O - "${1}" > /dev/null 2>&1; then
            echo "Service is reachable at ${1}"
            break
        else
            echo "Service is not reachable at ${1}. Retrying in 10 seconds..."
            sleep 10
        fi
    done

  check-file.sh: |
    #!/bin/sh
    set -e

    while true; do
        if [ -s "${1}" ]; then
            echo "File ${1} exists and is not empty."
            break
        else
            if [ -f "${1}" ]; then
                echo "File ${1} exists but is empty."
            else
                echo "File ${1} does not exist."
            fi
            echo "Retrying in 10 seconds..."
            sleep 10
        fi
    done

  nginx.conf: |
    events {
        worker_connections 2048;
        use epoll;
        multi_accept on;
    }

    http {
        # Required basic settings
        include       /etc/nginx/mime.types;
        default_type  application/octet-stream;
        client_max_body_size 100M;

        # Logging settings
        log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                          '$status $body_bytes_sent "$http_referer" '
                          '"$http_user_agent" "$http_x_forwarded_for"';
        access_log  /var/log/nginx/access.log  main;

        # Connection optimization
        sendfile        on;
        tcp_nopush      on;
        tcp_nodelay     on;
        keepalive_timeout  65;

        upstream r2r_backend {
            least_conn;
            server r2r:7272 max_fails=3 fail_timeout=30s;  # Use service name instead of container names
            keepalive 32;
        }

        server {
            listen 80;
            server_name localhost;

            # Timeouts
            proxy_connect_timeout 300s;
            proxy_send_timeout 300s;
            proxy_read_timeout 300s;

            # Buffer settings
            proxy_buffers 8 16k;
            proxy_buffer_size 32k;

            location / {
                proxy_pass http://r2r_backend;
                proxy_http_version 1.1;
                proxy_set_header Upgrade $http_upgrade;
                proxy_set_header Connection 'upgrade';
                proxy_set_header Host $host;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_set_header X-Forwarded-Proto $scheme;

                # Retry settings
                proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
                proxy_next_upstream_tries 3;
                proxy_next_upstream_timeout 10s;
            }

            location /health {
                access_log off;
                add_header 'Content-Type' 'application/json';
                return 200 '{"status":"healthy"}';
            }

            # Error responses
            error_page 500 502 503 504 /50x.html;
            location = /50x.html {
                root /usr/share/nginx/html;
            }
        }
    }


================================================
FILE: deployment/k8s/kustomizations/include/cm-r2r.yaml
================================================
# r2r-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: r2r-configmap
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
data:
#  POSTGRES_HOST: "postgres"
  R2R_POSTGRES_HOST: "r2r-documentdb"
  R2R_POSTGRES_PORT: "5432"
#  POSTGRES_PORT: "5432"
  R2R_POSTGRES_DBNAME: "r2r"
  R2R_PROJECT_NAME: "r2r_default"
  R2R_HOST: "0.0.0.0"
  R2R_PORT: "7272"
  R2R_LOG_LEVEL: INFO

  PYTHONUNBUFFERED: "1"
  R2R_CONFIG_NAME: "full"
#  R2R_CONFIG_PATH: "/app/r2r.toml"
#  R2R_CONFIG_TOML: "/app/r2r.toml"
  TELEMETRY_ENABLED: "false"
  R2R_POSTGRES_PROJECT_NAME: "r2r_default"
  R2R_POSTGRES_MAX_CONNECTIONS: "1024"
  R2R_POSTGRES_STATEMENT_CACHE_SIZE: "100"
  NEXT_PUBLIC_R2R_DEPLOYMENT_URL: "http://r2r:7272"
  NEXT_PUBLIC_HATCHET_DASHBOARD_URL: "http://hatchet-dashboard:80"
  R2R_DASHBOARD_PORT: "3000"
  R2R_NGINX_PORT: "80"
  R2R_HATCHET_DASHBOARD_PORT: "80"

  PGADMIN_ENABLE_TLS: "false"


  # API Base URLs
  OPENAI_API_BASE: "https://litellm.mywebsite.com/v1"
  LITELLM_PROXY_API_BASE: "https://litellm.mywebsite.com/v1"
  LITELLM_PROXY_API_URL: "https://litellm.mywebsite.com/v1"
  HUGGINGFACE_API_BASE: "https://hf-tei.mywebsite.com"


  AZURE_FOUNDRY_API_ENDPOINT: ""
  AZURE_API_BASE: ""
  AZURE_API_VERSION: ""
  VERTEX_PROJECT: ""
  VERTEX_LOCATION: ""
  AWS_REGION_NAME: ""
  OLLAMA_API_BASE: ""
#  OLLAMA_API_BASE: "http://host.docker.internal:11434"
  LM_STUDIO_API_BASE: ""

  CLUSTERING_SERVICE_URL: "http://r2r-graph-clustering:7276"    # Graphologic

  R2R_SENTRY_DSN: ""
  R2R_SENTRY_ENVIRONMENT: ""
  R2R_SENTRY_TRACES_SAMPLE_RATE: ""
  R2R_SENTRY_PROFILES_SAMPLE_RATE: ""
  GOOGLE_REDIRECT_URI: ""
  GITHUB_REDIRECT_URI: ""


================================================
FILE: deployment/k8s/kustomizations/include/cm-unstructured.yaml
================================================
---
# unstructured-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: unstructured-configmap
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
data:
  UNSTRUCTURED_SERVICE_URL: "http://unstructured:7275"
  UNSTRUCTURED_NUM_WORKERS: "10"
  UNSTRUCTURED_API_URL: "https://api.unstructured.io/general/v0/general"


================================================
FILE: deployment/k8s/kustomizations/include/hatchet-dashboard-initc.yaml
================================================
---
apiVersion: v1
kind: Service
metadata:
  name: hatchet-dashboard
spec:
  selector:
    app: hatchet-dashboard
  ports:
    - port: 80
      targetPort: 80
  type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hatchet-dashboard
  annotations:
    argocd.argoproj.io/sync-wave: "30"
spec:
  replicas: 1
  selector:
    matchLabels:
      app: hatchet-dashboard
  template:
    metadata:
      labels:
        app: hatchet-dashboard
    spec:
#      initContainers:
#      - name: wait-for-config-files
#        image: busybox:1.37.0
#        command:
#          - /bin/sh
#          - -c
#          - |
#            # Wait for config files to be generated by hatchet-init-job and pushed into Secret and be not empty.
#            sh /init/check-file.sh /hatchet/config/server.yaml
#            sh /init/check-file.sh /hatchet/config/database.yaml
#            echo "Config files are ready."
#        volumeMounts:
#        - mountPath: /init
#          name: init-scripts
#        - name: config-volume
#          mountPath: /hatchet/config
      containers:
      - name: hatchet-dashboard
        image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.54.4
        command: ["sh", "./entrypoint.sh", "--config", "/hatchet/config"]
        ports:
          - containerPort: 80
        env:
          - name: DATABASE_URL
            valueFrom:
              secretKeyRef:
                name: hatchet-shared-config
                key: DATABASE_URL
        envFrom:
        - secretRef:
            name: hatchet-config
        - secretRef:
            name: hatchet-shared-config

      volumes:
      - configMap:
          defaultMode: 493
          name: hatchet-init-scripts
        name: init-scripts


================================================
FILE: deployment/k8s/kustomizations/include/hatchet-engine-initc.yaml
================================================
---
apiVersion: v1
kind: Service
metadata:
  name: hatchet-engine
spec:
  selector:
    app: hatchet-engine
  ports:
    - port: 7077
      targetPort: 7077
  type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hatchet-engine
  annotations:
    argocd.argoproj.io/sync-wave: "30"
spec:
  replicas: 1
  selector:
    matchLabels:
      app: hatchet-engine
  template:
    metadata:
      labels:
        app: hatchet-engine
    spec:
      initContainers:
      - name: wait-for-config-files
        image: busybox:1.37.0
        command:
          - /bin/sh
          - -c
          - |
            # Wait for config files to be generated by hatchet-init-job and pushed into Secret and be not empty.
            sh /init/check-file.sh /hatchet/config/server.yaml
            sh /init/check-file.sh /hatchet/config/database.yaml
            echo "Config files are ready."
        volumeMounts:
        - mountPath: /init
          name: init-scripts
        - name: config-volume
          mountPath: /hatchet/config
      containers:
      - name: hatchet-engine
        image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.54.4
        command: ["/hatchet/hatchet-engine", "--config", "/hatchet/config"]
        ports:
          - containerPort: 7077
        envFrom:
          - secretRef:
              name: hatchet-secrets
          - configMapRef:
              name: hatchet-configmap
        livenessProbe:
          exec:
            command: ["wget", "-q", "-O", "-", "http://localhost:8733/live"]
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 5
        readinessProbe:
          exec:
            command: ["wget", "-q", "-O", "-", "http://localhost:8733/live"]
          initialDelaySeconds: 5
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        volumeMounts:
          - name: certs-volume
            mountPath: /hatchet/certs
          - name: config-volume
            mountPath: /hatchet/config
      volumes:
      - configMap:
          defaultMode: 493
          name: hatchet-init-scripts
        name: init-scripts
      - name: certs-volume
        secret:
          secretName: r2r-hatchet-gen-cert-files
      - name: config-volume
        secret:
          secretName: r2r-hatchet-gen-conf-files


================================================
FILE: deployment/k8s/kustomizations/include/hatchet-init-job.yaml
================================================
apiVersion: batch/v1
kind: Job
metadata:
  #generate a unique name for the job
  #generateName: hatchet-init-job-
  name: hatchet-init-job
spec:
  template:
    spec:
      restartPolicy: Never
      serviceAccountName: hatchet-job-sa

      containers:
      - name: minimal-job-container
        image: busybox:1.37.0
        command: ["sh", "-c", "echo", "All init Jobs are completed"]

      initContainers:

      - name: i01-hatchet-create-db
        image: postgres:17.2-alpine3.21
        envFrom:
        #DATABASE_URL
        #DATABASE_POSTGRES_HOST
        #DATABASE_POSTGRES_PORT
        #DATABASE_POSTGRES_USERNAME
        #DATABASE_POSTGRES_PASSWORD
        #DATABASE_POSTGRES_DB_NAME
        - secretRef:
            name: hatchet-shared-config
        volumeMounts:
        - mountPath: /init/create-db.sh
          name: init-scripts
          subPath: create-db.sh
        command: ["/bin/sh"]
        args:
        - -c
        - |
          sh /init/create-db.sh || exit 1
          echo "Job completed successfully: Database created"
          exit 0

      - name: i02-hatchet-migration
        image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.54.4
        envFrom:
        #DATABASE_URL
        - secretRef:
            name: hatchet-shared-config

      - name: i03-hatchet-setup
        image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.54.4
        envFrom:
        #DATABASE_URL
        #DATABASE_POSTGRES_PORT
        #DATABASE_POSTGRES_HOST
        #DATABASE_POSTGRES_USERNAME
        #DATABASE_POSTGRES_PASSWORD
        #DATABASE_POSTGRES_DB_NAME
        #SERVER_TASKQUEUE_RABBITMQ_URL
        #SERVER_AUTH_COOKIE_DOMAIN
        #SERVER_URL
        #SERVER_AUTH_COOKIE_INSECURE
        #SERVER_GRPC_BIND_ADDRESS
        #SERVER_GRPC_INSECURE
        #SERVER_GRPC_BROADCAST_ADDRESS
        #SERVER_GRPC_MAX_MSG_SIZE
        - secretRef:
            name: hatchet-shared-config
        #HATCHET_CLIENT_TLS_STRATEGY
        #HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CONF
        #HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_APIKEY
        #HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CERT
        #HATCHET_TENANT_ID
        #HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH
        #HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH
        #RABBITMQ_URL
        #RABBITMQ_MGMT_PORT
        - configMapRef:
            name: hatchet-configmap
        command: ["/bin/bash"]
        args:
        - -c
        - |
          apk add -q --no-interactive curl jq
          # Wait for the volumes to be mounted and files to be present
          sleep 5

          # Wait for RabbitMQ to be ready. Check if management port is open.
          sh /init/check-service.sh ${RABBITMQ_URL:-http://hatchet-rabbitmq}:${RABBITMQ_MGMT_PORT:-15672}

          #in case the secrets do not exists, create the directories
          echo "Preparing /hatchet_api_key and /hatchet/config directories..."
          mkdir -p /hatchet_api_key-cm /hatchet/certs-cm /hatchet/config-cm
          mkdir -p /hatchet_api_key /hatchet/certs /hatchet/config
          cp -r /hatchet_api_key-cm/. /hatchet_api_key/
          cp -r /hatchet/certs-cm/. /hatchet/certs/
          cp -r /hatchet/config-cm/. /hatchet/config/
          #chmod 666 -R /hatchet_api_key
          #chmod 666 -R /hatchet/certs
          #chmod 666 -R /hatchet/config

          #Generate Config
          bash /init/setup-config.sh  || exit 1
          echo "Job completed successfully: Config created."

          #Generate Token
          bash /init/setup-token.sh   || exit 1
          echo "Job completed successfully: Token created."

          #Push Config and Token into k8s Secrets
          bash /init/inject-secret.sh "/hatchet_api_key" "r2r-hatchet-gen-conf-api" "${HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_APIKEY:-false}" || exit 1
          echo "Job completed successfully: Token file is processed for k8s Secrets."

          bash /init/inject-secret.sh "/hatchet/config" "r2r-hatchet-gen-conf-files" "${HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CONF:-false}"  || exit 1
          echo "Job completed successfully: Config files are processed for k8s Secrets."

          #Push Certificates into k8s Secrets
          if [ "${HATCHET_CLIENT_TLS_STRATEGY}" = "none" ]; then
            echo ">>> HATCHET_CLIENT_TLS_STRATEGY is set to none, skipping certificate processing for k8s Secrets."
          else
            bash /init/inject-secret.sh "/hatchet/certs" "r2r-hatchet-gen-cert-files"  "${HATCHET_ADMIN_INIT_ALLOW_OVERRIDE_CERT:-false}" || exit 1
            echo "Job completed successfully: Certificate files are processed for k8s Secrets."
          fi

          exit 0
        volumeMounts:
        - name: init-scripts
          mountPath: /init

        - name: hatchet-api-key
          mountPath: /hatchet_api_key-cm
        - name: certs-volume
          mountPath: /hatchet/certs-cm
        - name: config-volume
          mountPath: /hatchet/config-cm

      volumes:
      - name: init-scripts
        configMap:
          defaultMode: 0755
          name: hatchet-init-scripts
      - name: hatchet-api-key
        secret:
          defaultMode: 0644
          secretName: r2r-hatchet-gen-conf-api
          optional: true
      - name: certs-volume
        secret:
          #stat -c "%a %n" *
          defaultMode: 0644
          secretName: r2r-hatchet-gen-cert-files
          optional: true
      - name: config-volume
        secret:
          defaultMode: 0644
          secretName: r2r-hatchet-gen-conf-files
          optional: true
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: hatchet-job-sa
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: hatchet-secret-writer
rules:
  - apiGroups: [""]
    resources: ["secrets"]
    verbs: ["update", "patch", "get"]
    resourceNames: ["r2r-hatchet-gen-conf-api", "r2r-hatchet-gen-conf-files", "r2r-hatchet-gen-cert-files"]
#  - apiGroups: [""]
#    resources: ["secrets"]
#    verbs: ["delete"]
#    resourceNames: ["r2r-hatchet-gen-conf-api", "r2r-hatchet-gen-conf-files", "r2r-hatchet-gen-cert-files"]
  - apiGroups: [""]
    resources: ["secrets"]
    verbs: ["create"]
#  - apiGroups: [""]
#    resources: ["secrets"]
#    verbs: ["watch", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: hatchet-secret-writer-binding
subjects:
  - kind: ServiceAccount
    name: hatchet-job-sa
roleRef:
  kind: Role
  name: hatchet-secret-writer
  apiGroup: rbac.authorization.k8s.io


================================================
FILE: deployment/k8s/kustomizations/include/hatchet-rabbitmq-sts.yaml
================================================
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: hatchet-rabbitmq
spec:
  serviceName: "hatchet-rabbitmq"
  replicas: 1
  selector:
    matchLabels:
      app: hatchet-rabbitmq
  template:
    metadata:
      labels:
        app: hatchet-rabbitmq
    spec:
      hostname: hatchet-rabbitmq
      containers:
      - name: hatchet-rabbitmq
        image: "rabbitmq:3.13.7-management-alpine"
        ports:
        - containerPort: 5672
          name: amqp
        - containerPort: 15672
          name: management
        env:
        - name: RABBITMQ_DEFAULT_USER
          valueFrom:
            secretKeyRef:
              name: hatchet-secrets
              key: RABBITMQ_DEFAULT_USER
        - name: RABBITMQ_DEFAULT_PASS
          valueFrom:
            secretKeyRef:
              name: hatchet-secrets
              key: RABBITMQ_DEFAULT_PASS
        volumeMounts:
        - name: rabbitmq-data
          mountPath: /var/lib/rabbitmq
        - name: rabbitmq-my-conf
          mountPath: /etc/rabbitmq/conf.d/myrabbitmq.conf
          subPath: myrabbitmq.conf
        livenessProbe:
          exec:
            command: ["rabbitmqctl", "status"]
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 10
          failureThreshold: 5
      volumes:
      - name: rabbitmq-my-conf
        configMap:
          name: hatchet-configmap
  volumeClaimTemplates:
  - metadata:
      name: rabbitmq-data
    spec:
      accessModes: ["ReadWriteOnce"]
      storageClassName: csi-sc
      resources:
        requests:
          storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
  name: hatchet-rabbitmq
spec:
  clusterIP: None
  selector:
    app: hatchet-rabbitmq
  ports:
  - port: 5672
    targetPort: 5672
    name: amqp
  - port: 15672
    targetPort: 15672
    name: management


================================================
FILE: deployment/k8s/kustomizations/include/pgadmin.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: pgadmin
spec:
  replicas: 1
  selector:
    matchLabels:
      app: pgadmin
  template:
    metadata:
      labels:
        app: pgadmin
    spec:
      containers:
      - name: pgadmin
        image: dpage/pgadmin4:8.14.0
        ports:
        - containerPort: 80
        env:
        - name: PGADMIN_DEFAULT_EMAIL
          valueFrom:
            secretKeyRef:
              name: pgadmin-secrets
              key: PGADMIN_DEFAULT_EMAIL
        - name: PGADMIN_DEFAULT_PASSWORD
          valueFrom:
            secretKeyRef:
              name: pgadmin-secrets
              key: PGADMIN_DEFAULT_PASSWORD
---
apiVersion: v1
kind: Service
metadata:
  name: pgadmin
spec:
  type: NodePort
  selector:
    app: pgadmin
  ports:
  - port: 80
    targetPort: 80


================================================
FILE: deployment/k8s/kustomizations/include/pgvector-sts.yaml
================================================
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: r2r-pgvector
spec:
  serviceName: "r2r-pgvector"
  replicas: 1
  selector:
    matchLabels:
      app: r2r-pgvector
  template:
    metadata:
      labels:
        app: r2r-pgvector
    spec:
      # Run the container as the non-root "postgres" user (UID 999) to prevent running as root.
      securityContext:
        runAsUser: 999
        fsGroup: 999
      containers:
      - name: r2r-pgvector
        image: pgvector/pgvector:0.8.0-pg17
        command:
          - postgres
          - -c
          - "max_connections=1024"
        env:
          - name: POSTGRES_USER
            valueFrom:
              secretKeyRef:
                name: r2r-secrets
                key: R2R_POSTGRES_USER
          - name: POSTGRES_PASSWORD
            valueFrom:
              secretKeyRef:
                name: r2r-secrets
                key: R2R_POSTGRES_PASSWORD
#          - name: POSTGRES_HOST
#            valueFrom:
#              configMapKeyRef:
#                name: r2r-configmap
#                key: R2R_POSTGRES_HOST
          - name: POSTGRES_PORT
            valueFrom:
              configMapKeyRef:
                name: r2r-configmap
                key: R2R_POSTGRES_PORT
          - name: POSTGRES_MAX_CONNECTIONS
            valueFrom:
              configMapKeyRef:
                name: r2r-configmap
                key: R2R_POSTGRES_MAX_CONNECTIONS
          - name: PGPORT
            valueFrom:
              configMapKeyRef:
                name: r2r-configmap
                key: R2R_POSTGRES_PORT
        ports:
          - containerPort: 5432
            name: r2r-pgvector
        volumeMounts:
          - name: postgres-data
            mountPath: /var/lib/postgresql/data
        #livenessProbe:
        #  exec:
        #    command:
        #      - "pg_isready"
        #      - "-U"
        #      - "${POSTGRES_USER}"
        #  initialDelaySeconds: 10
        #  timeoutSeconds: 5
        #  periodSeconds: 10
        #  failureThreshold: 5
  volumeClaimTemplates:
  - metadata:
      name: postgres-data
    spec:
      accessModes:
        - ReadWriteOnce
      storageClassName: csi-sc
      resources:
        requests:
          storage: 5Gi
---
# filepath: /manifests/postgres-service.yaml
apiVersion: v1
kind: Service
metadata:
  name: r2r-pgvector
spec:
  clusterIP: None
  selector:
    app: r2r-pgvector
  ports:
    - port: 5432
      targetPort: 5432
      name: r2r-pgvector


================================================
FILE: deployment/k8s/kustomizations/include/r2r-dashboard-indep.yaml
================================================
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: r2r-dashboard
spec:
  replicas: 1
  selector:
    matchLabels:
      app: r2r-dashboard
  template:
    metadata:
      labels:
        app: r2r-dashboard
    spec:
      containers:
      - name: r2r-dashboard
        image: emrgntcmplxty/r2r-dashboard:1.0.1
        ports:
        - containerPort: 3000
        env:
          - name: NEXT_PUBLIC_R2R_DEPLOYMENT_URL
            valueFrom:
              configMapKeyRef:
                name: r2r-configmap
                key: NEXT_PUBLIC_R2R_DEPLOYMENT_URL
          - name: NEXT_PUBLIC_HATCHET_DASHBOARD_URL
            valueFrom:
              configMapKeyRef:
                name: r2r-configmap
                key: NEXT_PUBLIC_HATCHET_DASHBOARD_URL
        # Optionally add a liveness/readiness probe as needed.
        # For example:
        # livenessProbe:
        #   httpGet:
        #     path: /live
        #     port: 3000
        #   initialDelaySeconds: 10
        #   periodSeconds: 10
        # readinessProbe:
        #   httpGet:
        #     path: /ready
        #     port: 3000
        #   initialDelaySeconds: 5
        #   periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
  name: r2r-dashboard
spec:
  selector:
    app: r2r-dashboard
  ports:
  - port: 3000           # External port from docker-compose ${R2R_DASHBOARD_PORT:-7273}
    targetPort: 3000     # Container port as set in docker-compose
  type: ClusterIP


================================================
FILE: deployment/k8s/kustomizations/include/r2r-graph-clustering-indep.yaml
================================================
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: r2r-graph-clustering
spec:
  replicas: 1
  selector:
    matchLabels:
      app: r2r-graph-clustering
  template:
    metadata:
      labels:
        app: r2r-graph-clustering
    spec:
      containers:
      - name: r2r-graph-clustering
        image: ragtoriches/cluster-prod:latest
        ports:
        - containerPort: 7276
        livenessProbe:
          exec:
            command: ["curl", "-f", "http://localhost:7276/health"]
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 5
---
apiVersion: v1
kind: Service
metadata:
  name: r2r-graph-clustering
spec:
  type: NodePort
  selector:
    app: r2r-graph-clustering
  ports:
  - port: 7276
    targetPort: 7276


================================================
FILE: deployment/k8s/kustomizations/include/r2r-initc.yaml
================================================
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: r2r
  annotations:
    argocd.argoproj.io/sync-wave: "30"

spec:
  replicas: 1
  selector:
    matchLabels:
      app: r2r
  template:
    metadata:
      labels:
        app: r2r
    spec:
      initContainers:
      - name: wait-for-configs-and-services
        image: busybox:1.37.0
        command:
          - /bin/sh
          - -c
          - |
            # Wait for /app/r2r.toml and /hatchet_api_key/api_key.txt to exist and be not empty.
            sh /init/check-file.sh /app/r2r.toml
            echo "Config file is ready."
            #sh /init/check-file.sh /hatchet_api_key/api_key.txt
            #echo "API key is ready."

            UNSTRUCTURED_HEALTH_URL=${UNSTRUCTURED_SERVICE_URL:-http://unstructured:7275}"/health"
            echo "Checking health of the Unstructured service at: ${UNSTRUCTURED_HEALTH_URL}..."
            sh /init/check-service.sh $UNSTRUCTURED_HEALTH_URL

            GRAPHCLUSTER_HEALTH_URL=${CLUSTERING_SERVICE_URL:-http://r2r-graph-clustering:7276}"/health"
            echo "Checking health of the Graph-Clustering service at: ${GRAPHCLUSTER_HEALTH_URL}..."
            sh /init/check-service.sh $GRAPHCLUSTER_HEALTH_URL

        env:
          - name: CLUSTERING_SERVICE_URL
            valueFrom:
              configMapKeyRef:
                name: r2r-configmap
                key: CLUSTERING_SERVICE_URL
          - name: UNSTRUCTURED_SERVICE_URL
            valueFrom:
              configMapKeyRef:
                name: unstructured-configmap
                key: UNSTRUCTURED_SERVICE_URL
        volumeMounts:
        - mountPath: /init
          name: init-scripts
#        - name: hatchet-api-key
#          mountPath: /hatchet_api_key
#          readOnly: true
        - name: r2r-toml
          mountPath: /app/r2r.toml
          subPath: r2r.toml
          readOnly: true
      containers:
      - name: r2r
        image: "ragtoriches/prod:3.3.32"
        command:
          - sh
          - -c
          - |
            #!/bin/sh
            sleep 10
            if [ -z "${HATCHET_CLIENT_TOKEN}" ]; then
              export HATCHET_CLIENT_TOKEN=$(cat /hatchet_api_key/api_key.txt)
            fi
            exec uvicorn core.main.app_entry:app --host ${R2R_HOST} --port ${R2R_PORT}
        ports:
          - containerPort: 7272
        envFrom:
          - configMapRef:
              name: unstructured-configmap
          - configMapRef:
              name: r2r-configmap
          - secretRef:
              name: r2r-secrets
        env:
          - name: HATCHET_CLIENT_TOKEN
            valueFrom:
              secretKeyRef:
                name: hatchet-client-config
                key: HATCHET_CLIENT_TOKEN
                optional: true
          - name: HATCHET_CLIENT_TLS_STRATEGY
            valueFrom:
              configMapKeyRef:
                name: hatchet-configmap
                key: HATCHET_CLIENT_TLS_STRATEGY
          - name: HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH
            valueFrom:
              configMapKeyRef:
                name: hatchet-configmap
                key: HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH
          - name: HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH
            valueFrom:
              configMapKeyRef:
                name: hatchet-configmap
                key: HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH
        #livenessProbe:
        #  httpGet:
        #    path: /v3/health
        #    port: 7272
        #  initialDelaySeconds: 60
        #  periodSeconds: 10
        #  timeoutSeconds: 5
        #  failureThreshold: 5
        volumeMounts:
#        - name: hatchet-api-key
#          mountPath: /hatchet_api_key
#          subPath: api_key.txt
#          readOnly: true
        - name: r2r-toml
          mountPath: /app/r2r.toml
          subPath: r2r.toml
          readOnly: true
      volumes:
      - configMap:
          defaultMode: 493
          name: r2r-init-scripts
        name: init-scripts
      - name: r2r-toml
        secret:
          defaultMode: 0455
          items:
          - key: r2r.toml
            path: r2r.toml
          secretName: r2r-files
#      - name: hatchet-api-key
#        secret:
#          defaultMode: 0755
#          items:
#          - key: HATCHET_CLIENT_TOKEN
#            path: api_key.txt
#          secretName: hatchet-client-config
---
# filepath: /manifests/r2r-service.yaml
apiVersion: v1
kind: Service
metadata:
  name: r2r
spec:
  selector:
    app: r2r
  ports:
    - port: 7272
      targetPort: 7272
  type: ClusterIP


================================================
FILE: deployment/k8s/kustomizations/include/r2r-nginx-indep.yaml
================================================
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: r2r-nginx
spec:
  replicas: 1
  selector:
    matchLabels:
      app: r2r-nginx
  template:
    metadata:
      labels:
        app: r2r-nginx
    spec:
      containers:
      - name: r2r-nginx
        image: nginx:1.27.3-alpine3.20-slim
        ports:
        - containerPort: 80
        volumeMounts:
        - name: nginx-conf-volume
          mountPath: /etc/nginx/nginx.conf
          subPath: nginx.conf
        livenessProbe:
          exec:
            command: ["curl", "-f", "http://localhost/health"]
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        resources:
          limits:
            cpu: "0.5"
            memory: "512Mi"
      volumes:
      - name: nginx-conf-volume
        configMap:
          name: r2r-init-scripts
---
apiVersion: v1
kind: Service
metadata:
  name: r2r-nginx
spec:
  type: NodePort
  selector:
    app: r2r-nginx
  ports:
  - port: 80
    targetPort: 80


================================================
FILE: deployment/k8s/kustomizations/include/unstructured-indep.yaml
================================================
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: unstructured
spec:
  replicas: 1
  selector:
    matchLabels:
      app: unstructured
  template:
    metadata:
      labels:
        app: unstructured
    spec:
      containers:
      - name: unstructured
        image: ragtoriches/unst-prod
        envFrom:
        - configMapRef:
            name: unstructured-configmap
        ports:
        - containerPort: 7275
        livenessProbe:
          exec:
            command: ["curl", "-f", "http://localhost:7275/health"]
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 5
---
apiVersion: v1
kind: Service
metadata:
  name: unstructured
spec:
  type: NodePort
  selector:
    app: unstructured
  ports:
  - port: 7275
    targetPort: 7275


================================================
FILE: deployment/k8s/kustomizations/kustomization.yaml
================================================
# kustomize build deployment/k8s/kustomizations --enable-helm > deployment/k8s/kustomizations/r2r.kustimized.yaml

apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: ai-system

images:
#    #https://hub.docker.com/r/dpage/pgadmin4/tags
#  - name: dpage/pgadmin4
#    newTag: 8.14.0
#    #https://hub.docker.com/_/alpine/tags?name=3.2
#  - name: alpine
#    newTag: 3.21.2
    #https://hub.docker.com/_/busybox/tags?name=1.3
  - name: busybox
    newTag: 1.37.0
    #https://hub.docker.com/_/nginx/tags?name=1.27
  - name: nginx
    newTag: 1.27.3-alpine3.20-slim

    #https://github.com/SciPhi-AI/R2R-Dashboard/blob/main/Dockerfile
    #https://hub.docker.com/r/emrgntcmplxty/r2r-dashboard/tags
  - name: emrgntcmplxty/r2r-dashboard
    newTag: 1.0.0
    #https://hub.docker.com/r/ragtoriches/prod/tags?name=3.
  - name: ragtoriches/prod
    newTag: 3.4.0
    #https://hub.docker.com/r/ragtoriches/cluster-prod/tags
  - name: ragtoriches/cluster-prod
    newTag: latest
    #https://github.com/SciPhi-AI/R2R/tree/main/services/unstructured
    #https://hub.docker.com/r/ragtoriches/unst-prod/tags
  - name: ragtoriches/unst-prod
    newTag: latest

    #ghcr.io/hatchet-dev/hatchet/hatchet-dashboard
  - name: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard
    newTag: v0.54.7
    #ghcr.io/hatchet-dev/hatchet/hatchet-engine
  - name: ghcr.io/hatchet-dev/hatchet/hatchet-engine
    newTag: v0.54.7
    #ghcr.io/hatchet-dev/hatchet/hatchet-admin
  - name: ghcr.io/hatchet-dev/hatchet/hatchet-admin
    newTag: v0.54.7
    #ghcr.io/hatchet-dev/hatchet/hatchet-migrate
  - name: ghcr.io/hatchet-dev/hatchet/hatchet-migrate
    newTag: v0.54.7
    #ghcr.io/hatchet-dev/hatchet/hatchet-api
  - name: ghcr.io/hatchet-dev/hatchet/hatchet-api
    newTag: v0.54.7
    #ghcr.io/hatchet-dev/hatchet/hatchet-frontend
  - name: ghcr.io/hatchet-dev/hatchet/hatchet-frontend
    newTag: v0.54.7

    #https://hub.docker.com/r/bitnami/rabbitmq/tags?name=3.
  - name: docker.io/bitnami/rabbitmq
    newTag: 3.12.14-debian-12-r7

    #https://hub.docker.com/_/postgres/tags?name=17.
  - name: postgres
    newTag: 0.8.0-pg16
    newName: pgvector/pgvector
    #https://hub.docker.com/r/pgvector/pgvector/tags?name=pg17
#  - name: pgvector/pgvector
#    newTag: 0.8.0-pg17

resources:
  - include/cm-hatchet.yaml
  - include/cm-r2r.yaml
  - include/cm-unstructured.yaml
  - include/cm-init-scripts-r2r.yaml
  - include/cm-init-scripts-hatchet.yaml

  - include/r2r-dashboard-indep.yaml
  - include/r2r-graph-clustering-indep.yaml
  - include/r2r-nginx-indep.yaml
  - include/unstructured-indep.yaml

  - include/r2r-initc.yaml
  - include/hatchet-dashboard-initc.yaml
#  - include/pgvector-sts.yaml
#  - include/pgadmin.yaml
#  - include/hatchet-init-job.yaml

helmCharts:
  - name: hatchet-ha
    #helm repo add hatchet https://hatchet-dev.github.io/hatchet-charts
    #helm repo update hatchet
    #helm search repo hatchet/hatchet-ha

    repo: https://hatchet-dev.github.io/hatchet-charts
    #version: 0.8.0
    version: 0.9.2
    releaseName: hatchet
    namespace: ai-system
    valuesFile: helm-values_hatchet.yaml
    includeCRDs: true

  - name: postgresql
    repo: oci://registry-1.docker.io/bitnamicharts
    #helm inspect chart oci://registry-1.docker.io/bitnamicharts/postgresql
    #skopeo list-tags docker://registry-1.docker.io/bitnamicharts/postgresql
    #version: 16.6.3
    version: 16.6.3
    releaseName: postgresql
    valuesFile: helm-values_postgresql.yaml
    includeCRDs: true
    # the Same Namespace
    namespace: ai-system

patches:
- path: patches/service.yaml
  target:
    kind: Service

- path: patches/hatchet-rabbitmq-sts.yaml
  target:
    kind: StatefulSet
    name: hatchet-rabbitmq

# Remove secrets generated by Helm chart
- path: patches/rm-secret-hatchet-rabbitmq-config.yaml
  target:
    kind: Secret
    name: hatchet-rabbitmq-config
- path: patches/rm-secret-hatchet-rabbitmq.yaml
  target:
    kind: Secret
    name: hatchet-rabbitmq
- path: patches/rm-secret-hatchet-shared-config.yaml
  target:
    kind: Secret
    name: hatchet-shared-config


================================================
FILE: deployment/k8s/kustomizations/patches/hatchet-rabbitmq-sts.yaml
================================================
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: hatchet-rabbitmq
spec:
  volumeClaimTemplates:
    - kind: PersistentVolumeClaim
      apiVersion: v1
      metadata:
        name: data
      spec:
        accessModes:
          - ReadWriteOnce
        resources:
          requests:
            storage: 8Gi
        storageClassName: csi-sc
  template:
    spec:
      containers:
      - env:
        - name: RABBITMQ_USERNAME
          value: ""
          valueFrom:
            secretKeyRef:
              key: rabbitmq-user
              name: hatchet-rabbitmq
        name: rabbitmq
        livenessProbe:
          exec:
            command:
            - sh
            - -ec
            - curl -f --user ${RABBITMQ_USERNAME}:${RABBITMQ_PASSWORD} 127.0.0.1:15672/api/health/checks/virtual-hosts
        readinessProbe:
          exec:
            command:
            - sh
            - -ec
            - curl -f --user ${RABBITMQ_USERNAME}:${RABBITMQ_PASSWORD} 127.0.0.1:15672/api/health/checks/local-alarms


================================================
FILE: deployment/k8s/kustomizations/patches/rm-secret-hatchet-postgres.yaml
================================================
$patch: delete
apiVersion: v1
kind: Secret
metadata:
  name: hatchet-postgres


================================================
FILE: deployment/k8s/kustomizations/patches/rm-secret-hatchet-rabbitmq-config.yaml
================================================
$patch: delete
apiVersion: v1
kind: Secret
metadata:
  name: hatchet-rabbitmq-config


================================================
FILE: deployment/k8s/kustomizations/patches/rm-secret-hatchet-rabbitmq.yaml
================================================
$patch: delete
apiVersion: v1
kind: Secret
metadata:
  name: hatchet-rabbitmq


================================================
FILE: deployment/k8s/kustomizations/patches/rm-secret-hatchet-shared-config.yaml
================================================
$patch: delete
apiVersion: v1
kind: Secret
metadata:
  name: hatchet-shared-config


================================================
FILE: deployment/k8s/kustomizations/patches/service.yaml
================================================
- op: replace
  path: /spec/ipFamilies
  value:
    - IPv4

- op: replace
  path: /spec/ipFamilyPolicy
  value:
    SingleStack
#    PreferDualStack


================================================
FILE: deployment/k8s/manifests/examples/externalsecret_hatchet.yaml
================================================
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: hatchet-shared-config
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
spec:
  ## kubectl -n kube-system annotate es vsphere-cpi-creds force-sync=$(date +%s) --overwrite
  refreshInterval: "0"
  secretStoreRef:
    # This name must match the metadata.name in the `SecretStore`
    name: bitwarden-secretsmanager
    kind: SecretStore
    #kind: ClusterSecretStore
  target:
    name: hatchet-shared-config
    # this is how the Kind=Secret will look like
    template:
      engineVersion: v2
      data:

        ADMIN_EMAIL: "{{ .RABBITMQ_ADMIN_EMAIL }}"
        ADMIN_PASSWORD: "{{ .RABBITMQ_ADMIN_PASSWORD }}"
        DATABASE_POSTGRES_DB_NAME: "hatchet"
        DATABASE_POSTGRES_HOST: "hatchet-documentdb"
        DATABASE_POSTGRES_PASSWORD: "{{ .HATCHET_DATABASE_POSTGRES_PASSWORD }}"
        DATABASE_POSTGRES_PORT: "5432"
        DATABASE_POSTGRES_SSL_MODE: "disable"
        DATABASE_POSTGRES_USERNAME: "{{ .HATCHET_DATABASE_POSTGRES_USERNAME }}"
        DATABASE_URL: "postgres://{{ .HATCHET_DATABASE_POSTGRES_USERNAME }}:{{ .HATCHET_DATABASE_POSTGRES_PASSWORD }}@hatchet-documentdb:5432/hatchet?sslmode=disable"
        SERVER_AUTH_BASIC_AUTH_ENABLED: "t"
        SERVER_AUTH_COOKIE_DOMAIN: "localhost:8080"
        SERVER_AUTH_COOKIE_INSECURE: "t"
        SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
        SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
        SERVER_GRPC_BROADCAST_ADDRESS: "controllers:7070"
        SERVER_GRPC_INSECURE: "true"
        SERVER_TASKQUEUE_RABBITMQ_URL: "amqp://{{ .RABBITMQ_DEFAULT_USER }}:{{ .RABBITMQ_DEFAULT_PASS }}@hatchet-rabbitmq:5672/"
        SERVER_URL: "http://localhost:8080"


  data:
  - secretKey: RABBITMQ_DEFAULT_PASS
    remoteRef:
      key: "6203f8e5-d273-0000-0000-aaa000000000"
  - secretKey: RABBITMQ_DEFAULT_USER
    remoteRef:
      key: "330e6465-4568-0000-0000-aaa000000000"
  - secretKey: HATCHET_DATABASE_POSTGRES_USERNAME
    remoteRef:
      key: "261e8389-852e-0000-0000-aaa000000000"
  - secretKey: HATCHET_DATABASE_POSTGRES_PASSWORD
    remoteRef:
      key: "5eb84a48-e16b-0000-0000-aaa000000000"
  - secretKey: RABBITMQ_ADMIN_EMAIL
    remoteRef:
      key: "3da5e88c-1640-0000-0000-aaa000000000"
  - secretKey: RABBITMQ_ADMIN_PASSWORD
    remoteRef:
      key: "98b55ce2-fce8-0000-0000-aaa000000000"
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: hatchet-rabbitmq-config
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
spec:
  ## kubectl -n kube-system annotate es vsphere-cpi-creds force-sync=$(date +%s) --overwrite
  refreshInterval: "0"
  secretStoreRef:
    # This name must match the metadata.name in the `SecretStore`
    name: bitwarden-secretsmanager
    kind: SecretStore
    #kind: ClusterSecretStore
  target:
    name: hatchet-rabbitmq-config
    # this is how the Kind=Secret will look like
    template:
      engineVersion: v2
      data:
        rabbitmq.conf: |
          ## Username and password
          default_user = {{ .RABBITMQ_DEFAULT_USER }}
          ## Clustering
          ##
          cluster_name = hatchet-rabbitmq
          cluster_formation.peer_discovery_backend  = rabbit_peer_discovery_k8s
          cluster_formation.k8s.host = kubernetes.default
          cluster_formation.k8s.address_type = hostname
          cluster_formation.k8s.service_name = hatchet-rabbitmq-headless
          cluster_formation.k8s.hostname_suffix = .hatchet-rabbitmq-headless.ai-system.svc.cluster.local
          cluster_formation.node_cleanup.interval = 10
          cluster_formation.node_cleanup.only_log_warning = true
          cluster_partition_handling = autoheal

          # queue master locator
          queue_master_locator = min-masters
          # enable loopback user
          loopback_users.hatchet = false
          #default_vhost = ai-system-vhost
          #disk_free_limit.absolute = 50MB

  data:
  - secretKey: RABBITMQ_DEFAULT_USER
    remoteRef:
      key: "330e6465-4568-48e1-ae07-b27c001f5f08"
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: hatchet-rabbitmq
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
spec:
  ## kubectl -n kube-system annotate es vsphere-cpi-creds force-sync=$(date +%s) --overwrite
  refreshInterval: "0"
  secretStoreRef:
    # This name must match the metadata.name in the `SecretStore`
    name: bitwarden-secretsmanager
    kind: SecretStore
    #kind: ClusterSecretStore
  target:
    name: hatchet-rabbitmq
    # this is how the Kind=Secret will look like
    template:
      engineVersion: v2
      data:
        rabbitmq-erlang-cookie: "{{ .rabbitmq_erlang_cookie }}"
        rabbitmq-password: "{{ .RABBITMQ_DEFAULT_PASS }}"
        rabbitmq-user: "{{ .RABBITMQ_DEFAULT_USER }}"

  data:
  - secretKey: rabbitmq_erlang_cookie
    remoteRef:
      key: "2aae42a4-8813-0000-0000-aaa000000000"
  - secretKey: RABBITMQ_DEFAULT_PASS
    remoteRef:
      key: "6203f8e5-d273-0000-0000-aaa000000000"
  - secretKey: RABBITMQ_DEFAULT_USER
    remoteRef:
      key: "330e6465-4568-0000-0000-aaa000000000"


================================================
FILE: deployment/k8s/manifests/examples/externalsecret_r2r.yaml
================================================
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: r2r-secrets
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
spec:
  ## kubectl -n kube-system annotate es vsphere-cpi-creds force-sync=$(date +%s) --overwrite
  refreshInterval: "0"
  secretStoreRef:
    # This name must match the metadata.name in the `SecretStore`
    name: bitwarden-secretsmanager
    kind: SecretStore
    #kind: ClusterSecretStore
  target:
    name: r2r-secrets
    # this is how the Kind=Secret will look like
    template:
      engineVersion: v2
      data:

        R2R_POSTGRES_USER: "{{ .R2R_POSTGRES_USER }}"
        R2R_POSTGRES_PASSWORD: "{{ .R2R_POSTGRES_PASSWORD }}"

        OPENAI_API_KEY: "{{ .OPENAI_API_KEY }}"
        LITELLM_PROXY_API_KEY: "{{ .OPENAI_API_KEY }}"
        R2R_SECRET_KEY: "{{ .R2R_SECRET_KEY }}"

        ANTHROPIC_API_KEY: ""
        AZURE_FOUNDRY_API_KEY: ""
        AZURE_API_KEY: ""
        GOOGLE_APPLICATION_CREDENTIALS: ""
        GEMINI_API_KEY: ""
        AWS_ACCESS_KEY_ID: ""
        AWS_SECRET_ACCESS_KEY: ""
        GROQ_API_KEY: ""
        COHERE_API_KEY: ""
        ANYSCALE_API_KEY: ""
        LM_STUDIO_API_KEY: ""
        HUGGINGFACE_API_KEY: "{{ .HF_TEI_LOCAL_API_KEY }}"
        UNSTRUCTURED_API_KEY: ""
        SERPER_API_KEY: ""
        SENDGRID_API_KEY: ""

        GOOGLE_CLIENT_ID: ""
        GOOGLE_CLIENT_SECRET: ""
        GITHUB_CLIENT_ID: ""
        GITHUB_CLIENT_SECRET: ""

  data:
  - secretKey: R2R_POSTGRES_USER
    remoteRef:
      key: "2ef5f595-067d-0000-0000-aaa000000000"
  - secretKey: R2R_POSTGRES_PASSWORD
    remoteRef:
      key: "5ddbf1a2-4db4-0000-0000-aaa000000000"
  - secretKey: OPENAI_API_KEY
    remoteRef:
      key: "4d6dd102-8ba6-0000-0000-aaa000000000"
  - secretKey: HF_TEI_LOCAL_API_KEY
    remoteRef:
      key: "d1f9c4a9-2ae2-0000-0000-aaa000000000"
  - secretKey: R2R_SECRET_KEY
    remoteRef:
      key: "2d845d61-d204-0000-0000-aaa000000000"

---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: r2r-files
  annotations:
    argocd.argoproj.io/sync-wave: "-2"
spec:
  ## kubectl -n kube-system annotate es vsphere-cpi-creds force-sync=$(date +%s) --overwrite
  refreshInterval: "0"
  secretStoreRef:
    # This name must match the metadata.name in the `SecretStore`
    name: bitwarden-secretsmanager
    kind: SecretStore
    #kind: ClusterSecretStore
  target:
    name: r2r-files
    # this is how the Kind=Secret will look like
    template:
      engineVersion: v2
      data:
        r2r.toml: |
          [app]
          # app settings are global available like `r2r_config.agent.app`
          # project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
          default_max_documents_per_user = 1_000
          default_max_chunks_per_user = 1_000_000
          default_max_collections_per_user = 100

          # Set the default max upload size to 200 GB for local testing
          default_max_upload_size = 214748364800

          # LLM used for internal operations, like deriving conversation names
          fast_llm = "openai/openai-cloudflareaig/gpt-4o-mini"

          # LLM used for user-facing output, like RAG replies
          quality_llm = "openai/openai-cloudflareaig/gpt-4o"

          # LLM used for ingesting visual inputs
          vlm = "openai/openai-cloudflareaig/gpt-4o"

          # LLM used for transcription
          audio_lm = "openai/openai-cloudflareaig/whisper-1"


          [agent]
          #system_instruction_name = "rag_agent"                # The "system" message or prompt name
          agent_static_prompt = "static_rag_agent"
          agent_dynamic_prompt = "dynamic_rag_agent"
          # tools = ["local_search", "content", "web_search"]   # uncomment to enable web search
          tools = ["local_search", "content"]                   # Tools accessible to the agent

            [agent.generation_config]
            #model = "openai/openai-cloudflareaig/gpt-4o"
            model = "openai/openai-cloudflareaig/gpt-4o-mini"
            #temperature = 0.7
            #top_p = 0.9
            #max_tokens_to_sample = 1_024
            #stream = false
            #functions = []
            #tools = []
            #api_base = ""
            #add_generation_kwargs = {}


          [auth]
          provider = "r2r"                          # Supported values: "r2r", "supabase"
          access_token_lifetime_in_minutes = 60000  # Lifetime of access token in minutes
          refresh_token_lifetime_in_days = 7        # Lifetime of refresh token in days
          require_authentication = false            # If true, all requests must provide valid auth
          require_email_verification = false        # If true, newly created users must verify email
          default_admin_email = "{{ .default_admin_email }}"
          default_admin_password = "{{ .default_admin_password }}"

            #[auth.extra_fields]
            #supabase_url = "https://your-supabase-url.com"   # Required if provider="supabase"
            #supabase_key = "{{ .supabase_key }}"             # Required if provider="supabase"


          [completion]
          provider = "r2r"                          # litellm
          concurrent_request_limit = 64             # Global concurrency limit for completion requests

            [completion.generation_config]
            #model = "openai/openai-cloudflareaig/gpt-4o"
            model = "openai/openai-cloudflareaig/gpt-4o-mini"
            temperature = 0.1
            top_p = 1
            max_tokens_to_sample = 1_024            # 4_096
            stream = false
            #functions = []                         # If provider supports function calling
            #tools = []                             # If provider supports tool usage
            #api_base = ""                          # Custom base URL if needed
            add_generation_kwargs = { }             # Catch-all for extra generation params (e.g., "stop" tokens, etc.)
            #response_format.type = "json_object"   # Ebable strict structured JSON-mode response format: "json_object" or leave blank

          [crypto]
          provider = "bcrypt"                       # "bcrypt" or "nacl"
                                                    # "bcrypt": uses BcryptCryptoProvider (crypto/bcrypt.py)
                                                    # "nacl":   uses NaClCryptoProvider   (crypto/nacl.py)

          #secret_key = ""                          # Master key for JWT token signing
                                                    # Default fallback from env: R2R_SECRET_KEY
                                                    # If not set, code may use a built-in default (NOT RECOMMENDED for production)


          [database]
          provider = "postgres"                     # "postgres", "mysql", "sqlite", or custom
          default_collection_name = "Default"
          default_collection_description = "Your default collection."
          enable_fts = true                         # whether or not to enable full-text search, e.g `hybrid search`
          # collection_summary_system_prompt = 'default_system'
          # collection_summary_task_prompt = 'default_collection_summary'

          # KG settings
          batch_size = 256                          # Some ingestion/DB ops batch size (especially for large data)

            [database.graph_creation_settings]      # Configuration for the model used in knowledge graph creation.
              clustering_mode = "local"             # "remote" or "local"
              graph_entity_description_prompt = "graph_entity_description"
              graph_extraction_prompt = "graph_extraction"
              entity_types = []                     # if empty, all entities are extracted
              relation_types = []                   # if empty, all relations are extracted
              automatic_deduplication = true        # enable automatic deduplication of entities
              fragment_merge_count = 4              # number of fragments to merge into a single extraction
              max_knowledge_relationships = 100
              max_knowledge_triples = 100           # max number of triples to extract for each document chunk
              max_description_input_length = 49_152
              #generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" }
              generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" } # and other params, model used for relationshipt extraction
              #concurrent_request_limit = 2

            [database.graph_entity_deduplication_settings]
              graph_entity_deduplication_type = "by_name"  # "by_name", "by_id"
              graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
              max_description_input_length = 49_152   # increase if you want more comprehensive descriptions
              #generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" }
              generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" } # and other params, model used for deduplication
              #concurrent_request_limit = 2

            [database.graph_enrichment_settings]
              graph_communities_prompt = "graph_communities"
              max_summary_input_length = 49_152
              #generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" }
              generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" } # and other params, model used for node description and graph clustering
              leiden_params = {}                                                        # Parameters for the Leiden algorithm.
              #concurrent_request_limit = 2

            [database.graph_search_settings]        #What is this used for? Should be configuration for the model used in knowledge graph search operations.
              enabled = true
              #generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" }
              generation_config = { model = "openai/ollama-openai/sparse-llama3.1:8b-2of4-bf16" }

            [database.limits]
              # Default fallback limits if no route or user-level overrides are found
              global_per_min = 30_000
              monthly_limit = 100_000

            [database.route_limits]
              # Set the `v3/retrieval/search` route to have a maximum of 5 requests per minute
              "/v3/retrieval/search" = { route_per_min = 120, monthly_limit = 1_000_000 }
              "/v3/retrieval/rag" = { route_per_min = 30 }

            [database.user_limits."47e53676-b478-5b3f-a409-234ca2164de5"]
            global_per_min = 2
            route_per_min = 1


          [embedding]
          provider = "litellm"
          concurrent_request_limit = 32          # Embedding concurrency limit

          # For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`

          # RECOMMENDED - For advanced applications,
          # use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
          #base_model = "openai/openai-cloudflareaig/text-embedding-3-small"
          #base_dimension = 512
          #base_model = "openai/infinity/bge-en-icl"
          base_model = "openai/nebius/bge-en-icl"
          base_dimension = 4_096
          #api_base = "https://litellm.mywebsite.com/v1"            # Optional, can be set via LITELLM_PROXY_API_BASE
          #api_key = "{{ .LITELLM_PROXY_API_KEY }}"

          rerank_model = "huggingface/BAAI/bge-reranker-v2-m3"    # Optional re-rank model
          #rerank_url = "https://hf-tei.mywebsite.com"    # Optional URL for re-rank, can be set via HUGGINGFACE_API_BASE

          batch_size = 32                                         # Number of texts processed per request
          add_title_as_prefix = false                             # If true, prepend the doc title to text
          concurrent_request_limit = 64
          quantization_settings = { quantization_type = "FP32" }

            [embedding.chunk_enrichment_settings]
            generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" }


          [completion_embedding]
          # Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
          provider = "litellm"
          base_model = "openai/nebius/bge-en-icl"
          base_dimension = 512
          batch_size = 128
          add_title_as_prefix = false
          concurrent_request_limit = 256


          [file]
          provider = "postgres"                   # "postgres", "local", "s3", etc. if implemented


          [ingestion]
          provider = "r2r"
          strategy = "auto"                       # Could be "auto", "by_title", "recursive", etc.
          provider = "unstructured_local"         # "r2r", "unstructured_local", "unstructured_api"
                                                  # r2r chunking_strategy: recursive only
                                                  # unstructured_local chunking_strategy: by_title or character
          chunking_strategy = "by_title"          # "recursive", "by_title", "character", etc. depending on the provider
          chunk_size = 1_024
          chunk_overlap = 512
          excluded_parsers = ["mp4"]              # Example of skipping certain file types

          automatic_extraction = true             # enable automatic extraction of entities and relations
          new_after_n_chars = 2_048
          max_characters = 4_096
          combine_under_n_chars = 1_024
          overlap = 1_024
          ingestion_mode = "hi-res"                # "hi-res" or "lo-res" for ingestion mode

                                                  #- `hi-res`: Thorough ingestion with full summaries and enrichment.
                                                  #- `fast`: Quick ingestion with minimal enrichment and no summaries.
                                                  #- `custom`: Full control via `ingestion_config`.
                                                  #If `filters` or `limit` (in `ingestion_config`) are provided alongside `hi-res` or `fast`,
                                                  #they will override the default settings for that mode.
          # Ingestion-time document summary parameters
          skip_document_summary = false
          # document_summary_system_prompt = 'default_system'
          # document_summary_task_prompt = 'default_summary'
          # chunks_for_document_summary = 128
          document_summary_model = "openai/openai-cloudflareaig/gpt-4o-mini"  # Summaries for each doc chunk

          audio_transcription_model = "openai/whisper-1"  # If ingesting audio
          #vision_img_model = "openai/openai-cloudflareaig/gpt-4o"
          vision_img_model = "openai/ollama-openai/llama3.2-vision:90b-instruct-q4_k_m" # If vision-based models supported
          #vision_pdf_model = "openai/openai-cloudflareaig/gpt-4o"
          vision_pdf_model = "openai/ollama-openai/llama3.2-vision:90b-instruct-q4_k_m"

            [ingestion.chunk_enrichment_settings]
              chunk_enrichment_prompt = "chunk_enrichment"
              enable_chunk_enrichment = false   # disabled by default
              n_chunks = 2 # the number of chunks (both preceeding and succeeding) to use in enrichment
              strategies = ["semantic", "neighborhood"]
              forward_chunks = 3
              backward_chunks = 3
              semantic_neighbors = 10
              semantic_similarity_threshold = 0.7
              generation_config = { model = "openai/openai-cloudflareaig/gpt-4o-mini" }

            [ingestion.extra_parsers]
              pdf = "zerox"                     # "zerox" parser override for PDFs (extended functionality)


          [logging]
          level = "DEBUG"   # One of: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
          provider = "r2r"
          log_table = "logs"
          log_info_table = "log_info"
          # file = "app.log" # Log output file path


          [orchestration]
          provider = "hatchet"                 # "hatchet" or "simple"
          kg_creation_concurrency_limit = 32  # used if "hatchet" orchestrator
          ingestion_concurrency_limit = 16    # used if "hatchet" orchestrator
          kg_concurrency_limit = 8            # used if "hatchet" orchestrator


          [prompt]
          provider = "r2r"


          [email]
          provider = "console_mock"         # "smtp", "sendgrid", or "console_mock"
                                            #
                                            # - "smtp": uses AsyncSMTPEmailProvider (email/smtp.py)
                                            # - "sendgrid": uses SendGridEmailProvider (email/sendgrid.py)
                                            # - "console_mock": uses ConsoleMockEmailProvider (email/console_mock.py)

            # Console Mock settings (provider="console_mock")
            [email.console_mock]
            logs = true  # If true, logs emails to console for testing

  data:
  - secretKey: default_admin_email
    remoteRef:
      key: "1330136d-c49b-0000-0000-aaa000000000"
  - secretKey: default_admin_password
    remoteRef:
      key: "059ba37f-a172-0000-0000-aaa000000000"
  - secretKey: supabase_key
    remoteRef:
      key: "84c50cae-56a8-0000-0000-aaa000000000"
  - secretKey: R2R_SECRET_KEY
    remoteRef:
      key: "2d845d61-d204-0000-0000-aaa000000000"
  - secretKey: LITELLM_PROXY_API_KEY
    remoteRef:
      key: "4d6dd102-8ba6-0000-0000-aaa000000000"
---


================================================
FILE: deployment/k8s/manifests/examples/ingress-r2r.yaml
================================================
# Dependancy https://external-dns.io
# To add a DNS record for wren-ui.myhost.net host
# Note: without authentication, enyone can acess your app, see your data and modify your settings!
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: r2r.mywebsite.com-tls
  annotations:
    ### Dependancy external-dns
    external-dns.alpha.kubernetes.io/filter: 'include'
    external-dns.alpha.kubernetes.io/cloudflare-proxied: 'true'
    external-dns.alpha.kubernetes.io/provider-cloudflare: 'true'
    external-dns.alpha.kubernetes.io/target: so-ingress.mywebsite.com
    #external-dns.alpha.kubernetes.io/target: so-ingress.mywebsite.com

    ### Dependancy nginx-ingress-controller
    nginx.ingress.kubernetes.io/disable-lua: 'true'
    nginx.ingress.kubernetes.io/enable-lua: 'false'
    nginx.ingress.kubernetes.io/enable-vts-status: 'false'
    nginx.ingress.kubernetes.io/enable-modsecurity: 'false'
    nginx.ingress.kubernetes.io/modsecurity-snippet: |
      SecRuleEngine Off
    nginx.ingress.kubernetes.io/enable-owasp-modsecurity-crs: 'false'
    nginx.ingress.kubernetes.io/proxy-connect-timeout: '360'
    nginx.ingress.kubernetes.io/proxy-read-timeout: '360'
    nginx.ingress.kubernetes.io/proxy-send-timeout: '360'

spec:
  #instead you may use other ingressClassName such as AWS alb. If other than nginx ingress is used, don't forget to comment unsupported annotations above
  #"nginx" or "alb"
  ingressClassName: nginx
  rules:
    - host: r2r.mywebsite.com
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
              #fix the service name to match your service name
                name: r2r-dashboard
                port:
                  number: 3000
          - path: /hatchet
            pathType: Prefix
            backend:
              service:
              #fix the service name to match your service name
                name: hatchet-dashboard
                port:
                  number: 80
### Comment TLS section if you are not going to use https
  tls:
    - hosts:
      - r2r.mywebsite.com
      secretName: r2r.mywebsite.com-tls


================================================
FILE: deployment/k8s/manifests/examples/secrets_hatchet.yaml
================================================
---
apiVersion: v1
data:
  ADMIN_EMAIL: ++++++++
  ADMIN_PASSWORD: ++++++++
  DATABASE_POSTGRES_DB_NAME: ++++++++
  DATABASE_POSTGRES_HOST: ++++++++
  DATABASE_POSTGRES_PASSWORD: ++++++++
  DATABASE_POSTGRES_PORT: ++++++++
  DATABASE_POSTGRES_SSL_MODE: ++++++++
  DATABASE_POSTGRES_USERNAME: ++++++++
  DATABASE_URL: ++++++++
  SERVER_AUTH_BASIC_AUTH_ENABLED: ++++++++
  SERVER_AUTH_COOKIE_DOMAIN: ++++++++
  SERVER_AUTH_COOKIE_INSECURE: ++++++++
  SERVER_AUTH_SET_EMAIL_VERIFIED: ++++++++
  SERVER_GRPC_BIND_ADDRESS: ++++++++
  SERVER_GRPC_BROADCAST_ADDRESS: ++++++++
  SERVER_GRPC_INSECURE: ++++++++
  SERVER_TASKQUEUE_RABBITMQ_URL: ++++++++
  SERVER_URL: ++++++++
kind: Secret
metadata:
  name: hatchet-shared-config
  namespace: ai-system
type: Opaque

---
apiVersion: v1
data:
  rabbitmq.conf: ++++++++
kind: Secret
metadata:
  name: hatchet-rabbitmq-config
  namespace: ai-system
type: Opaque
---
apiVersion: v1
data:
  rabbitmq-erlang-cookie: ++++++++
  rabbitmq-password: ++++++++
  rabbitmq-user: ++++++++
kind: Secret
metadata:
  name: hatchet-rabbitmq
  namespace: ai-system
type: Opaque


================================================
FILE: deployment/k8s/manifests/examples/secrets_r2r.yaml
================================================
---
apiVersion: v1
data:
  ANTHROPIC_API_KEY: ++++++++
  ANYSCALE_API_KEY: ++++++++
  AWS_ACCESS_KEY_ID: ++++++++
  AWS_SECRET_ACCESS_KEY: ++++++++
  AZURE_API_KEY: ++++++++
  AZURE_FOUNDRY_API_KEY: ++++++++
  COHERE_API_KEY: ++++++++
  GEMINI_API_KEY: ++++++++
  GITHUB_CLIENT_ID: ++++++++
  GITHUB_CLIENT_SECRET: ++++++++
  GOOGLE_APPLICATION_CREDENTIALS: ++++++++
  GOOGLE_CLIENT_ID: ++++++++
  GOOGLE_CLIENT_SECRET: ++++++++
  GROQ_API_KEY: ++++++++
  HUGGINGFACE_API_KEY: ++++++++
  LITELLM_PROXY_API_KEY: ++++++++
  LM_STUDIO_API_KEY: ++++++++
  OPENAI_API_KEY: ++++++++
  R2R_POSTGRES_PASSWORD: ++++++++
  R2R_POSTGRES_USER: ++++++++
  R2R_SECRET_KEY: ++++++++
  SENDGRID_API_KEY: ++++++++
  SERPER_API_KEY: ++++++++
  UNSTRUCTURED_API_KEY: ++++++++
kind: Secret
metadata:
  name: r2r-secrets
  namespace: ai-system
type: Opaque
---
apiVersion: v1
data:
  r2r.toml: ++++++++
kind: Secret
metadata:
  name: r2r-files
  namespace: ai-system
type: Opaque


================================================
FILE: docker/compose.full.swarm.yaml
================================================
volumes:
  hatchet_certs:
    name: ${VOLUME_HATCHET_CERTS:-hatchet_certs}
  hatchet_config:
    name: ${VOLUME_HATCHET_CONFIG:-hatchet_config}
  hatchet_api_key:
    name: ${VOLUME_HATCHET_API_KEY:-hatchet_api_key}
  postgres_data:
    name: ${VOLUME_POSTGRES_DATA:-postgres_data}
  hatchet_rabbitmq_data:
    name: ${VOLUME_HATCHET_RABBITMQ_DATA:-hatchet_rabbitmq_data}
  hatchet_rabbitmq_conf:
    name: ${VOLUME_HATCHET_RABBITMQ_CONF:-hatchet_rabbitmq_conf}
  hatchet_postgres_data:
    name: ${VOLUME_HATCHET_POSTGRES_DATA:-hatchet_postgres_data}

services:
  postgres:
    image: pgvector/pgvector:pg16
    environment:
      - POSTGRES_USER=${R2R_POSTGRES_USER:-postgres}
      - POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-postgres}
      - POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
      - POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
      - POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
      - PGPORT=${R2R_POSTGRES_PORT:-5432}
    volumes:
      - postgres_data:/var/lib/postgresql/data
    ports:
      - "${R2R_POSTGRES_PORT:-5432}:${R2R_POSTGRES_PORT:-5432}"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${R2R_POSTGRES_USER:-postgres}"]
      interval: 10s
      timeout: 5s
      retries: 5
    command: >
      postgres
      -c max_connections=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-postgres:
    image: postgres:latest
    environment:
      POSTGRES_DB: ${HATCHET_POSTGRES_DBNAME:-hatchet}
      POSTGRES_USER: ${HATCHET_POSTGRES_USER:-hatchet_user}
      POSTGRES_PASSWORD: ${HATCHET_POSTGRES_PASSWORD:-hatchet_password}
    volumes:
      - hatchet_postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${HATCHET_POSTGRES_USER:-hatchet_user} -d ${HATCHET_POSTGRES_DBNAME:-hatchet}"]
      interval: 10s
      timeout: 5s
      retries: 5
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-rabbitmq:
    image: "rabbitmq:3-management"
    hostname: "hatchet-rabbitmq"
    ports:
      - "${R2R_RABBITMQ_PORT:-5673}:5672"
      - "${R2R_RABBITMQ_MGMT_PORT:-15673}:15672"
    environment:
      RABBITMQ_DEFAULT_USER: "user"
      RABBITMQ_DEFAULT_PASS: "password"
    volumes:
      - hatchet_rabbitmq_data:/var/lib/rabbitmq
      - hatchet_rabbitmq_conf:/etc/rabbitmq/rabbitmq.conf
    healthcheck:
      test: ["CMD", "rabbitmqctl", "status"]
      interval: 10s
      timeout: 10s
      retries: 5
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-create-db:
    image: postgres:latest
    command: >
      sh -c "
        set -e
        echo 'Waiting for PostgreSQL to be ready...'
        while ! pg_isready -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user}; do
          sleep 1
        done
        echo 'PostgreSQL is ready, checking if database exists...'
        if ! PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} psql -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then
          echo 'Database does not exist, creating it...'
          PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} createdb -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -w ${HATCHET_POSTGRES_DBNAME:-hatchet}
        else
          echo 'Database already exists, skipping creation.'
        fi
      "
    environment:
      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-migration:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.53.15
    environment:
      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
    depends_on:
      - hatchet-create-db
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-setup-config:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
    command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false
    environment:
      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"

      HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-134217728}"
      HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH: "${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-134217728}"

      DATABASE_POSTGRES_PORT: "5432"
      DATABASE_POSTGRES_HOST: hatchet-postgres
      DATABASE_POSTGRES_USERNAME: "${HATCHET_POSTGRES_USER:-hatchet_user}"
      DATABASE_POSTGRES_PASSWORD: "${HATCHET_POSTGRES_PASSWORD:-hatchet_password}"
      HATCHET_DATABASE_POSTGRES_DB_NAME: "${HATCHET_POSTGRES_DBNAME:-hatchet}"

      SERVER_TASKQUEUE_RABBITMQ_URL: amqp://user:password@hatchet-rabbitmq:5672/
      SERVER_AUTH_COOKIE_DOMAIN: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
      SERVER_URL: "http://host.docker.internal:${R2R_HATCHET_DASHBOARD_PORT:-7274}"
      SERVER_AUTH_COOKIE_INSECURE: "t"
      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
      SERVER_GRPC_INSECURE: "t"
      SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
      SERVER_GRPC_MAX_MSG_SIZE: 134217728
    volumes:
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
    depends_on:
      - hatchet-migration
      - hatchet-rabbitmq
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-engine:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.53.15
    command: /hatchet/hatchet-engine --config /hatchet/config
    depends_on:
      - hatchet-setup-config
    ports:
      - "${R2R_HATCHET_ENGINE_PORT:-7077}:7077"
    environment:
      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
      SERVER_GRPC_BROADCAST_ADDRESS: "hatchet-engine:7077"
      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
      SERVER_GRPC_PORT: "7077"
      SERVER_GRPC_INSECURE: "t"
      SERVER_GRPC_MAX_MSG_SIZE: 134217728
    volumes:
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
    healthcheck:
      test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8733/live"]
      interval: 10s
      timeout: 5s
      retries: 5
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  hatchet-dashboard:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.53.15
    command: sh ./entrypoint.sh --config /hatchet/config
    depends_on:
      - hatchet-setup-config
    environment:
      DATABASE_URL: "postgres://${HATCHET_POSTGRES_USER:-hatchet_user}:${HATCHET_POSTGRES_PASSWORD:-hatchet_password}@hatchet-postgres:5432/${HATCHET_POSTGRES_DBNAME:-hatchet}?sslmode=disable"
    volumes:
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
    ports:
      - "${R2R_HATCHET_DASHBOARD_PORT:-7274}:80"
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  setup-token:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
    command: sh /scripts/setup-token.sh
    volumes:
      - ./scripts:/scripts
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
      - hatchet_api_key:/hatchet_api_key
    depends_on:
      - hatchet-setup-config
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  unstructured:
    image: ${UNSTRUCTURED_IMAGE:-ragtoriches/unst-prod}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7275/health"]
      interval: 10s
      timeout: 5s
      retries: 5
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  graph_clustering:
    image: ${GRAPH_CLUSTERING_IMAGE:-ragtoriches/cluster-prod}
    ports:
      - "${R2R_GRAPH_CLUSTERING_PORT:-7276}:7276"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
      interval: 10s
      timeout: 5s
      retries: 5
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure

  r2r:
    image: sciphiai/r2r:latest
    ports:
      - "${R2R_PORT:-7272}:${R2R_PORT:-7272}"
    environment:
      - PYTHONUNBUFFERED=1
      - R2R_PORT=${R2R_PORT:-7272}
      - R2R_HOST=${R2R_HOST:-0.0.0.0}

      # R2R
      - R2R_LOG_LEVEL=${R2R_LOG_LEVEL:-INFO}
      - R2R_LOG_CONSOLE_FORMATTER=${R2R_LOG_CONSOLE_FORMATTER:-json}
      - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-}
      - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-}
      - R2R_PROJECT_NAME=${R2R_PROJECT_NAME:-r2r_default}
      - R2R_SECRET_KEY=${R2R_SECRET_KEY:-}

      # Postgres
      - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-postgres}
      - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-postgres}
      - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
      - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
      - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
      - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
      - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}

      # OpenAI
      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
      - OPENAI_API_BASE=${OPENAI_API_BASE:-}

      # Azure Foundry
      - AZURE_FOUNDRY_API_ENDPOINT=${AZURE_FOUNDRY_API_ENDPOINT:-}
      - AZURE_FOUNDRY_API_KEY=${AZURE_FOUNDRY_API_KEY:-}

      # XAI / GROK
      - XAI_API_KEY=${XAI_API_KEY:-}

      # Anthropic
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}

      # Azure
      - AZURE_API_KEY=${AZURE_API_KEY:-}
      - AZURE_API_BASE=${AZURE_API_BASE:-}
      - AZURE_API_VERSION=${AZURE_API_VERSION:-}

      # Google Vertex AI
      - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
      - VERTEX_PROJECT=${VERTEX_PROJECT:-}
      - VERTEX_LOCATION=${VERTEX_LOCATION:-}

      # Google Gemini
      - GEMINI_API_KEY=${GEMINI_API_KEY:-}

      # AWS Bedrock
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
      - AWS_REGION_NAME=${AWS_REGION_NAME:-}

      # Groq
      - GROQ_API_KEY=${GROQ_API_KEY:-}

      # Cohere
      - COHERE_API_KEY=${COHERE_API_KEY:-}

      # Anyscale
      - ANYSCALE_API_KEY=${ANYSCALE_API_KEY:-}

      # Ollama
      - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434}

      # LM Studio
      - LM_STUDIO_API_BASE=${LM_STUDIO_API_BASE:-http://host.docker.internal:1234}
      - LM_STUDIO_API_KEY=${LM_STUDIO_API_KEY:-1234}

      # Huggingface
      - HUGGINGFACE_API_BASE=${HUGGINGFACE_API_BASE:-http://host.docker.internal:8080}
      - HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY}

      # Unstructured
      - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
      - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general}
      - UNSTRUCTURED_SERVICE_URL=${UNSTRUCTURED_SERVICE_URL:-http://unstructured:7275}
      - UNSTRUCTURED_NUM_WORKERS=${UNSTRUCTURED_NUM_WORKERS:-10}

      # Hatchet
      - HATCHET_CLIENT_TLS_STRATEGY=none
      - HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH:-134217728}
      - HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=${HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH:-134217728}

      # Graphologic
      - CLUSTERING_SERVICE_URL=http://graph_clustering:7276

      # OAuth Credentials
      - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID}
      - GOOGLE_CLIENT_SECRET=${GOOGLE_CLIENT_SECRET}
      - GOOGLE_REDIRECT_URI=${GOOGLE_REDIRECT_URI}

      - GITHUB_CLIENT_ID=${GITHUB_CLIENT_ID}
      - GITHUB_CLIENT_SECRET=${GITHUB_CLIENT_SECRET}
      - GITHUB_REDIRECT_URI=${GITHUB_REDIRECT_URI}

      # Other
      - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
      - SERPER_API_KEY=${SERPER_API_KEY}
      - SENDGRID_API_KEY=${SENDGRID_API_KEY}
      - R2R_SENTRY_DSN=${R2R_SENTRY_DSN}
      - R2R_SENTRY_ENVIRONMENT=${R2R_SENTRY_ENVIRONMENT}
      - R2R_SENTRY_TRACES_SAMPLE_RATE=${R2R_SENTRY_TRACES_SAMPLE_RATE}
      - R2R_SENTRY_PROFILES_SAMPLE_RATE=${R2R_SENTRY_PROFILES_SAMPLE_RATE}

    command: >
      sh -c '
        if [ -z "$${HATCHET_CLIENT_TOKEN}" ]; then
          export HATCHET_CLIENT_TOKEN=$$(cat /hatchet_api_key/api_key.txt)
        fi
        exec uvicorn core.main.app_entry:app --host $${R2R_HOST} --port $${R2R_PORT}
      '
    volumes:
      - ${R2R_CONFIG_PATH:-/}:${R2R_CONFIG_PATH:-/app/config}
      - hatchet_api_key:/hatchet_api_key:ro
    extra_hosts:
      - host.docker.internal:host-gateway
    depends_on:
      - setup-token
      - unstructured
      - graph_clustering
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v3/health"]
      interval: 6s
      timeout: 5s
      retries: 5
      start_period: 30s
    deploy:
      replicas: ${R2R_REPLICAS:-3}
      restart_policy:
        condition: on-failure
      update_config:
        parallelism: 1
        delay: 30s
        order: start-first
      rollback_config:
        parallelism: 1
        delay: 30s

  r2r-dashboard:
    image: sciphiai/r2r-dashboard:1.0.3
    environment:
      - NEXT_PUBLIC_R2R_DEPLOYMENT_URL=${R2R_DEPLOYMENT_URL:-http://localhost:7272}
      - NEXT_PUBLIC_HATCHET_DASHBOARD_URL=${HATCHET_DASHBOARD_URL:-http://localhost:${R2R_HATCHET_DASHBOARD_PORT:-7274}}
    ports:
      - "${R2R_DASHBOARD_PORT:-7273}:3000"
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure


================================================
FILE: docker/compose.full.yaml
================================================
volumes:
  hatchet_certs:
    name: hatchet_certs
  hatchet_config:
    name: hatchet_config
  hatchet_api_key:
    name: hatchet_api_key
  hatchet_rabbitmq_data:
    name: hatchet_rabbitmq_data
  hatchet_rabbitmq_conf:
    name: hatchet_rabbitmq_conf
  hatchet_postgres_data:
    name: hatchet_postgres_data
  minio_data:
    name: minio_data
  postgres_data:
    name: postgres_data

services:
  postgres:
    image: pgvector/pgvector:pg16
    profiles: [postgres]
    env_file:
      - ./env/postgres.env
    volumes:
      - postgres_data:/var/lib/postgresql/data
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 10s
      timeout: 5s
      retries: 5
    restart: on-failure
    command: >
      postgres
      -c max_connections=1024

  minio:
    image: minio/minio
    profiles: [minio]
    env_file:
      - ./env/minio.env
    volumes:
      - minio_data:/data
    ports:
      - "9000:9000"
      - "9001:9001"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
      interval: 10s
      timeout: 5s
      retries: 5
    restart: on-failure
    command: server /data --console-address ":9001"

  hatchet-postgres:
    image: postgres:latest
    env_file:
      - ./env/hatchet.env
    volumes:
      - hatchet_postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U hatchet_user -d hatchet"]
      interval: 10s
      timeout: 5s
      retries: 5


  hatchet-rabbitmq:
    image: "rabbitmq:3-management"
    hostname: "hatchet-rabbitmq"
    ports:
      - "5673:5672"
      - "15673:15672"
    env_file:
      - ./env/hatchet.env
    volumes:
      - hatchet_rabbitmq_data:/var/lib/rabbitmq
      - hatchet_rabbitmq_conf:/etc/rabbitmq/rabbitmq.conf
    healthcheck:
      test: ["CMD", "rabbitmqctl", "status"]
      interval: 10s
      timeout: 10s
      retries: 5

  hatchet-create-db:
    image: postgres:latest
    command: sh /scripts/create-hatchet-db.sh
    volumes:
      - ./scripts:/scripts
    env_file:
      - ./env/hatchet.env

  hatchet-migration:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-migrate:v0.53.15
    env_file:
      - ./env/hatchet.env
    depends_on:
      hatchet-create-db:
        condition: service_completed_successfully

  hatchet-setup-config:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
    command: /hatchet/hatchet-admin quickstart --skip certs --generated-config-dir /hatchet/config --overwrite=false
    env_file:
      - ./env/hatchet.env
    volumes:
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
    depends_on:
      hatchet-migration:
        condition: service_completed_successfully
      hatchet-rabbitmq:
        condition: service_healthy

  hatchet-engine:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-engine:v0.53.15
    command: /hatchet/hatchet-engine --config /hatchet/config
    restart: on-failure
    depends_on:
      hatchet-setup-config:
        condition: service_completed_successfully
    ports:
      - "7077:7077"
    env_file:
      - ./env/hatchet.env
    volumes:
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
    healthcheck:
      test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8733/live"]
      interval: 10s
      timeout: 5s
      retries: 5

  hatchet-dashboard:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-dashboard:v0.53.15
    command: sh ./entrypoint.sh --config /hatchet/config
    restart: on-failure
    depends_on:
      hatchet-setup-config:
        condition: service_completed_successfully
    env_file:
      - ./env/hatchet.env
    volumes:
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
    ports:
      - "7274:80"

  setup-token:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-admin:v0.53.15
    command: sh /scripts/setup-token.sh
    volumes:
      - ./scripts:/scripts
      - hatchet_certs:/hatchet/certs
      - hatchet_config:/hatchet/config
      - hatchet_api_key:/hatchet_api_key
    depends_on:
      hatchet-setup-config:
        condition: service_completed_successfully

  unstructured:
    image: ragtoriches/unst-prod
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7275/health"]
      interval: 10s
      timeout: 5s
      retries: 5

  graph_clustering:
    image: ragtoriches/cluster-prod
    ports:
      - "7276:7276"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
      interval: 10s
      timeout: 5s
      retries: 5

  r2r:
    image: sciphiai/r2r:latest
    ports:
      - "7272:7272"
    env_file:
      - ./env/r2r-full.env
    command: sh /scripts/start-r2r.sh
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7272/v3/health"]
      interval: 6s
      timeout: 5s
      retries: 5
    restart: on-failure
    volumes:
      - ./user_configs:/app/user_configs
      - ./user_tools:/app/user_tools
      - hatchet_api_key:/hatchet_api_key:ro
      - ./scripts:/scripts
    extra_hosts:
      - host.docker.internal:host-gateway
    depends_on:
      setup-token:
        condition: service_completed_successfully
      unstructured:
        condition: service_healthy
      graph_clustering:
        condition: service_healthy

  r2r-dashboard:
    image: sciphiai/r2r-dashboard:1.0.3
    env_file:
      - ./env/r2r-dashboard.env
    ports:
      - "7273:3000"


================================================
FILE: docker/compose.yaml
================================================
volumes:
  postgres_data:
    name: postgres_data
  minio_data:
    name: minio_data

services:
  postgres:
    image: pgvector/pgvector:pg16
    profiles: [postgres]
    env_file:
      - ./env/postgres.env
    volumes:
      - postgres_data:/var/lib/postgresql/data
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 10s
      timeout: 5s
      retries: 5
    restart: on-failure
    command: >
      postgres
      -c max_connections=1024

  minio:
    image: minio/minio
    profiles: [minio]
    env_file:
      - ./env/minio.env
    volumes:
      - minio_data:/data
    ports:
      - "9000:9000"
      - "9001:9001"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
      interval: 10s
      timeout: 5s
      retries: 5
    restart: on-failure
    command: server /data --console-address ":9001"

  graph_clustering:
    image: ragtoriches/cluster-prod
    ports:
      - "7276:7276"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7276/health"]
      interval: 10s
      timeout: 5s
      retries: 5

  r2r:
    image: sciphiai/r2r:latest
    ports:
      - "7272:7272"
    env_file:
      - ./env/r2r.env
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:7272/v3/health"]
      interval: 6s
      timeout: 5s
      retries: 5
    restart: on-failure
    volumes:
      - ./user_configs:/app/user_configs
      - ./user_tools:/app/user_tools
    extra_hosts:
      - host.docker.internal:host-gateway

  r2r-dashboard:
    image: sciphiai/r2r-dashboard:1.0.3
    env_file:
      - ./env/r2r-dashboard.env
    ports:
      - "7273:3000"


================================================
FILE: docker/env/hatchet.env
================================================
DATABASE_URL="postgres://hatchet_user:hatchet_password@hatchet-postgres:5432/hatchet?sslmode=disable"

HATCHET_CLIENT_GRPC_MAX_RECV_MESSAGE_LENGTH=134217728
HATCHET_CLIENT_GRPC_MAX_SEND_MESSAGE_LENGTH=134217728

DATABASE_POSTGRES_PORT=5432
DATABASE_POSTGRES_HOST=hatchet-postgres
DATABASE_POSTGRES_USERNAME=hatchet_user
DATABASE_POSTGRES_PASSWORD=hatchet_password
HATCHET_DATABASE_POSTGRES_DB_NAME=hatchet
POSTGRES_DB=hatchet
POSTGRES_USER=hatchet_user
POSTGRES_PASSWORD=hatchet_password

SERVER_TASKQUEUE_RABBITMQ_URL=amqp://user:password@hatchet-rabbitmq:5672/
SERVER_AUTH_COOKIE_DOMAIN=http://host.docker.internal:7274
SERVER_URL=http://host.docker.internal:7274
SERVER_AUTH_COOKIE_INSECURE=t
SERVER_GRPC_BIND_ADDRESS=0.0.0.0
SERVER_GRPC_INSECURE=t
SERVER_GRPC_BROADCAST_ADDRESS=hatchet-engine:7077
SERVER_GRPC_MAX_MSG_SIZE=134217728
SERVER_GRPC_PORT="7077"

RABBITMQ_DEFAULT_USER=user
RABBITMQ_DEFAULT_PASS=password


================================================
FILE: docker/env/minio.env
================================================
MINIO_ROOT_USER=minioadmin
MINIO_ROOT_PASSWORD=minioadmin


================================================
FILE: docker/env/postgres.env
================================================
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_MAX_CONNECTIONS=1024
PGPORT=5432


================================================
FILE: docker/env/r2r-dashboard.env
================================================
NEXT_PUBLIC_R2R_DEPLOYMENT_URL=http://localhost:7272
NEXT_PUBLIC_HATCHET_DASHBOARD_URL=http://localhost:7274
NEXT_PUBLIC_R2R_DEFAULT_EMAIL="admin@example.com"
NEXT_PUBLIC_R2R_DEFAULT_PASSWORD="change_me_immediately"


================================================
FILE: docker/env/r2r-full.env
================================================
# R2R
R2R_PORT=7272
R2R_HOST=0.0.0.0
R2R_LOG_LEVEL=INFO
R2R_CONFIG_NAME=full
R2R_CONFIG_PATH=
R2R_PROJECT_NAME=r2r_default
R2R_SECRET_KEY=
R2R_USER_TOOLS_PATH=/app/user_tools
R2R_LOG_FORMAT=

# Postgres Configuration
R2R_POSTGRES_USER=postgres
R2R_POSTGRES_PASSWORD=postgres
R2R_POSTGRES_HOST=postgres
R2R_POSTGRES_PORT=5432
R2R_POSTGRES_DBNAME=postgres
R2R_POSTGRES_MAX_CONNECTIONS=1024
R2R_POSTGRES_STATEMENT_CACHE_SIZE=100

# Hatchet
HATCHET_CLIENT_TLS_STRATEGY=none

# OpenAI
OPENAI_API_KEY=
OPENAI_API_BASE=

# Azure Foundry
AZURE_FOUNDRY_API_ENDPOINT=
AZURE_FOUNDRY_API_KEY=

# XAI / GROK
XAI_API_KEY=

# Anthropic
ANTHROPIC_API_KEY=

# Azure
AZURE_API_KEY=
AZURE_API_BASE=
AZURE_API_VERSION=

# Google Vertex AI
GOOGLE_APPLICATION_CREDENTIALS=
VERTEX_PROJECT=
VERTEX_LOCATION=

# Google Gemini
GEMINI_API_KEY=

# Mistral
MISTRAL_API_KEY=

# AWS Bedrock
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION_NAME=

# Groq
GROQ_API_KEY=

# Cohere
COHERE_API_KEY=

# Anyscale
ANYSCALE_API_KEY=

# Ollama
OLLAMA_API_BASE=http://host.docker.internal:11434

# LM Studio
LM_STUDIO_API_BASE=http://host.docker.internal:1234
LM_STUDIO_API_KEY=1234

# Huggingface
HUGGINGFACE_API_BASE=http://host.docker.internal:8080
HUGGINGFACE_API_KEY=

# Unstructured
UNSTRUCTURED_API_KEY=
UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
UNSTRUCTURED_SERVICE_URL=http://unstructured:7275
UNSTRUCTURED_NUM_WORKERS=10

# Graphologic
CLUSTERING_SERVICE_URL=http://graph_clustering:7276

# OAuth Credentials
GOOGLE_CLIENT_ID=
GOOGLE_CLIENT_SECRET=
GOOGLE_REDIRECT_URI=

GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
GITHUB_REDIRECT_URI=

# Email
MAILERSEND_API_KEY=
SENDGRID_API_KEY=

# Websearch
FIRECRAWL_API_KEY=
SERPER_API_KEY=
TAVILY_API_KEY=

# Sentry Tracing
R2R_SENTRY_DSN=
R2R_SENTRY_ENVIRONMENT=
R2R_SENTRY_TRACES_SAMPLE_RATE=
R2R_SENTRY_PROFILES_SAMPLE_RATE=


================================================
FILE: docker/env/r2r.env
================================================
# R2R
R2R_PORT=7272
R2R_HOST=0.0.0.0
R2R_LOG_LEVEL=INFO
R2R_CONFIG_NAME=
R2R_CONFIG_PATH=
R2R_PROJECT_NAME=r2r_default
R2R_SECRET_KEY=
R2R_USER_TOOLS_PATH=/app/user_tools
R2R_LOG_FORMAT=

# Postgres Configuration
R2R_POSTGRES_USER=postgres
R2R_POSTGRES_PASSWORD=postgres
R2R_POSTGRES_HOST=postgres
R2R_POSTGRES_PORT=5432
R2R_POSTGRES_DBNAME=postgres
R2R_POSTGRES_MAX_CONNECTIONS=1024
R2R_POSTGRES_STATEMENT_CACHE_SIZE=100

# Hatchet
HATCHET_CLIENT_TLS_STRATEGY=none

# OpenAI
OPENAI_API_KEY=
OPENAI_API_BASE=

# Azure Foundry
AZURE_FOUNDRY_API_ENDPOINT=
AZURE_FOUNDRY_API_KEY=

# XAI / GROK
XAI_API_KEY=

# Anthropic
ANTHROPIC_API_KEY=

# Azure
AZURE_API_KEY=
AZURE_API_BASE=
AZURE_API_VERSION=

# Google Vertex AI
GOOGLE_APPLICATION_CREDENTIALS=
VERTEX_PROJECT=
VERTEX_LOCATION=

# Google Gemini
GEMINI_API_KEY=

# Mistral
MISTRAL_API_KEY=

# AWS Bedrock
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION_NAME=

# Groq
GROQ_API_KEY=

# Cohere
COHERE_API_KEY=

# Anyscale
ANYSCALE_API_KEY=

# Ollama
OLLAMA_API_BASE=http://host.docker.internal:11434

# LM Studio
LM_STUDIO_API_BASE=http://host.docker.internal:1234
LM_STUDIO_API_KEY=1234

# Huggingface
HUGGINGFACE_API_BASE=http://host.docker.internal:8080
HUGGINGFACE_API_KEY=

# Unstructured
UNSTRUCTURED_API_KEY=
UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
UNSTRUCTURED_SERVICE_URL=http://unstructured:7275
UNSTRUCTURED_NUM_WORKERS=10

# Graphologic
CLUSTERING_SERVICE_URL=http://graph_clustering:7276

# OAuth Credentials
GOOGLE_CLIENT_ID=
GOOGLE_CLIENT_SECRET=
GOOGLE_REDIRECT_URI=

GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
GITHUB_REDIRECT_URI=

# Email
MAILERSEND_API_KEY=
SENDGRID_API_KEY=

# Websearch
FIRECRAWL_API_KEY=
SERPER_API_KEY=
TAVILY_API_KEY=

# Sentry Tracing
R2R_SENTRY_DSN=
R2R_SENTRY_ENVIRONMENT=
R2R_SENTRY_TRACES_SAMPLE_RATE=
R2R_SENTRY_PROFILES_SAMPLE_RATE=


================================================
FILE: docker/fluent-bit/fluent-bit.conf
================================================
[SERVICE]
    Flush        1
    Daemon       Off
    Log_Level    info
    Parsers_File parsers.conf

[INPUT]
    Tag    backend
    Name   forward
    Listen 0.0.0.0
    Port   24224

[FILTER]
    Match    backend
    Name     parser
    Key_Name log
    Parser   json

[OUTPUT]
    Match            backend
    Name             http
    host             host.docker.internal
    port             9428
    uri              /insert/jsonline?_stream_fields=log&_msg_field=msg,message&_time_field=date
    format           json_lines
    json_date_format iso8601


================================================
FILE: docker/fluent-bit/parsers.conf
================================================
[PARSER]
    Name   json
    Format json


================================================
FILE: docker/scripts/create-hatchet-db.sh
================================================
#!/bin/bash

set -e
echo 'Waiting for PostgreSQL to be ready...'
while ! pg_isready -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user}; do
  sleep 1
done

echo 'PostgreSQL is ready, checking if database exists...'
if ! PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} psql -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -lqt | grep -qw ${HATCHET_POSTGRES_DBNAME:-hatchet}; then
  echo 'Database does not exist, creating it...'
  PGPASSWORD=${HATCHET_POSTGRES_PASSWORD:-hatchet_password} createdb -h hatchet-postgres -p 5432 -U ${HATCHET_POSTGRES_USER:-hatchet_user} -w ${HATCHET_POSTGRES_DBNAME:-hatchet}
else
  echo 'Database already exists, skipping creation.'
fi


================================================
FILE: docker/scripts/setup-token.sh
================================================
#!/bin/bash

set -e
echo 'Starting token creation process...'

# Attempt to create token and capture both stdout and stderr
TOKEN_OUTPUT=$(/hatchet/hatchet-admin token create --config /hatchet/config --tenant-id 707d0855-80ab-4e1f-a156-f1c4546cbf52 2>&1)

# Extract the token (assuming it's the only part that looks like a JWT)
TOKEN=$(echo "$TOKEN_OUTPUT" | grep -Eo 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*')

if [ -z "$TOKEN" ]; then
    echo 'Error: Failed to extract token. Full command output:' >&2
    echo "$TOKEN_OUTPUT" >&2
    exit 1
fi

echo "$TOKEN" > /tmp/hatchet_api_key
echo 'Token created and saved to /tmp/hatchet_api_key'

# Copy token to final destination
echo -n "$TOKEN" > /hatchet_api_key/api_key.txt
echo 'Token copied to /hatchet_api_key/api_key.txt'

# Verify token was copied correctly
if [ "$(cat /tmp/hatchet_api_key)" != "$(cat /hatchet_api_key/api_key.txt)" ]; then
    echo 'Error: Token copy failed, files do not match' >&2
    echo 'Content of /tmp/hatchet_api_key:'
    cat /tmp/hatchet_api_key
    echo 'Content of /hatchet_api_key/api_key.txt:'
    cat /hatchet_api_key/api_key.txt
    exit 1
fi

echo 'Hatchet API key has been saved successfully'
echo 'Token length:' ${#TOKEN}
echo 'Token (first 20 chars):' ${TOKEN:0:20}
echo 'Token structure:' $(echo $TOKEN | awk -F. '{print NF-1}') 'parts'

# Check each part of the token
for i in 1 2 3; do
    PART=$(echo $TOKEN | cut -d. -f$i)
    echo 'Part' $i 'length:' ${#PART}
    echo 'Part' $i 'base64 check:' $(echo $PART | base64 -d >/dev/null 2>&1 && echo 'Valid' || echo 'Invalid')
done

# Final validation attempt
if ! echo $TOKEN | awk -F. '{print $2}' | base64 -d 2>/dev/null | jq . >/dev/null 2>&1; then
    echo 'Warning: Token payload is not valid JSON when base64 decoded' >&2
else
    echo 'Token payload appears to be valid JSON'
fi


================================================
FILE: docker/scripts/start-r2r.sh
================================================
#!/bin/bash

# Check if HATCHET_CLIENT_TOKEN is set, if not read it from the API key file
if [ -z "${HATCHET_CLIENT_TOKEN}" ]; then
  export HATCHET_CLIENT_TOKEN=$(cat /hatchet_api_key/api_key.txt)
fi

# Start the application
exec uvicorn core.main.app_entry:app --host ${R2R_HOST} --port ${R2R_PORT}


================================================
FILE: docker/user_configs/README.md
================================================
# User Configs Directory

## Overview
This directory is mounted inside the R2R Docker container and is intended for custom configuration files. Any files placed here will be accessible to the application running in the container.

## Usage
1. Place your custom configuration files in this directory.
2. Set the `R2R_CONFIG_PATH` in the `r2r.env` or `r2r-full.env` files.
3. The path format inside the container is: `/app/user_configs/<config>.toml`

## Configuration
The application uses the environment variable you set to locate your configuration file:
```
R2R_CONFIG_PATH=/app/user_configs/<config>.toml
```

If you want to use a different filename, update the `R2R_CONFIG_PATH` variable in your environment file to point to your custom file, for example:
```
R2R_CONFIG_PATH=/app/user_configs/my_custom_config.toml
```

## Troubleshooting
If you encounter configuration errors, check:
1. Your configuration file exists in this directory
2. The filename matches what's specified in `R2R_CONFIG_PATH`
3. The file has proper permissions (readable)
4. The file contains valid TOML syntax

For more detailed configuration information, see the main documentation.


================================================
FILE: docker/user_tools/README.md
================================================
# User-Defined Tools Directory

## Overview
This directory is mounted inside the R2R Docker container and is intended for custom tool files. Any files placed here will be accessible to the application running in the container.

## Usage
1. Place your custom tool definitions in this directory. Utilize the template structure demonstrated here.
2. Add any additional dependencies that you may need to the user_requirements.txt file in this directory.
3. Include the tool in your agent configuration.

## Creating a tool
```python
from core.base.agent.tools.base import Tool


class ToolNameTool(Tool):
    """
    A user defined tool.
    """

    def __init__(self):
        super().__init__(
            name="tool_name",
            description="A natural language tool description that is shown to the agent.",
            parameters={
                "type": "object",
                "properties": {
                    "input_parameter": {
                        "type": "string",
                        "description": "Define any input parameters by their name and type",
                    },
                },
                "required": ["input_parameter"],
            },
            results_function=self.execute,
            llm_format_function=None,
        )

    async def execute(self, input_parameter: str, *args, **kwargs):
        """
        Implementation of the tool.
        """

        # Any custom tool logic can go here

        output_response = some_method(input_parameter)

        result = AggregateSearchResult(
            generic_tool_result=[web_response],
        )

        # Add to results collector if context is provided
        if context and hasattr(context, "search_results_collector"):
            context.search_results_collector.add_aggregate_result(result)

        return result
```

## Troubleshooting

For more detailed configuration information, see the main documentation.


================================================
FILE: docker/user_tools/user_requirements.txt
================================================


================================================
FILE: docs/README.md
================================================
# R2R Documentation

The most advanced AI retrieval system. Agentic Retrieval-Augmented Generation (RAG) with a RESTful API.

## Documentation Sections

### [Introduction](./introduction/)
- [System Overview](./introduction/system.md)
- [Guides](./introduction/guides/)

### [Documentation](./documentation/)
- [Getting Started](./documentation/README.md)
- [General Features](./documentation/general/)
- [Retrieval](./documentation/retrieval/)
- [Advanced Features](./documentation/advanced/)

### [API & SDKs](./api/)
- [API Reference](./api/)
- [SDK Documentation](./api/)

### [Cookbooks](./cookbooks/)
- [Data Processing](./cookbooks/data-processing/)
- [System Operations](./cookbooks/system-operations/)

### [Self-Hosting](./self-hosting/)
- [Installation](./self-hosting/getting-started/installation/)
- [Configuration](./self-hosting/configuration/)
- [Deployment](./self-hosting/deployment/)


================================================
FILE: docs/cookbooks/application.md
================================================
R2R offers an [open-source React+Next.js application](https://github.com/SciPhi-AI/R2R-Application) designed to give developers an administrative portal for their R2R deployment, and users an application to communicate with out of the box.

## Setup

### Install PNPM

PNPM is a fast, disk space-efficient package manager. To install PNPM, visit the [official PNPM installation page](https://pnpm.io/installation) or follow these instructions:

<AccordionGroup>

<Accordion icon="terminal" title="PNPM Installation">
For Unix-based systems (Linux, macOS):

```zsh
curl -fsSL https://get.pnpm.io/install.sh | sh -
```

For Windows:

```powershell
iwr https://get.pnpm.io/install.ps1 -useb | iex
```

After installation, you may need to add PNPM to your system's PATH.
</Accordion>

</AccordionGroup>

### Installing and Running the R2R Dashboard

If you're running R2R with the Docker, you already have the R2R application running! Just navigate to [http://localhost:7273](http://localhost:7273).

If you're running R2R outside of Docker, run the following commands to install the R2R Dashboard.

1. Clone the project repository and navigate to the project directory:

```zsh
git clone https://github.com/SciPhi-AI/R2R.git
cd R2R-Application
```

2. Install the project dependencies:

```zsh
pnpm install
```

3. Build and start the application for production:

```zsh
pnpm build
pnpm start
```

The dashboard will be available at [http://localhost:3000](http://localhost:3000).

## Features

### Login

To interact with R2R with the dashboard, you must first login. If it's your first time logging in, log in with the default credentials shown.

By default, an R2R instance is hosted on port 7272. The login page will include this URL by default, but be sure to update the URL if your R2R instance is deployed elsewhere. For information about deploying a local R2R application server, see the [quickstart](/documentation/quickstart).

![R2R Dashboard Overview](./images/application/login.png)


### Documents

The documents page provides an overview of uploaded documents and their metadata. You can upload new documents and update, download, or delete existing ones. Additionally, you can view information about each document, including the documents' chunks and previews of PDFs.

![Documents Page](./images/application/oss_dashboard_documents.png)

### Collections

Collections allow users to create and share sets of documents. The collections page provides a place to manage your existing collections or create new collections.

![Collections Page](./images/application/oss_collections_page.png)

### Chat

In the chat page, you can stream RAG responses with different models and configurable settings. You can interact with both the RAG Agent and RAG endpoints here.

![Chat Interface](./images/application/chat.png)

### Users

Manage your users and gain insight into their interactions.

![Users Page](./images/application/users.png)

### Settings

The settings page allows you to view the configuration of and edit the prompts associated with your R2R deployment.

![Settings Page](./images/application/settings_config.png)
![Settings Page](./images/application/settings_prompts.png)

## Development

To develop the R2R dashboard:

1. Start the development server:

```zsh
pnpm dev
```

2. Run pre-commit checks (optional but recommended):

```zsh
pnpm format
pnpm lint
```


================================================
FILE: docs/cookbooks/custom-tools.md
================================================
There are many cases where it is helpful to define custom tools for the RAG Agent. R2R allows for users to define custom tools, passing these definitions into the Agent at server start.

### Defining New Tools
There is a directory in the R2R repository, `/docker/user_tools`, which is mounted to the R2R docker container. It is here that we will place our custom tool files.

There, we will find a README.md file, which includes a template for our new tool:


```python
from core.base.agent.tools.base import Tool


class ToolNameTool(Tool):
    """
    A user defined tool.
    """

    def __init__(self):
        super().__init__(
            name="tool_name",
            description="A natural language tool description that is shown to the agent.",
            parameters={
                "type": "object",
                "properties": {
                    "input_parameter": {
                        "type": "string",
                        "description": "Define any input parameters by their name and type",
                    },
                },
                "required": ["input_parameter"],
            },
            results_function=self.execute,
            llm_format_function=None,
        )

    async def execute(self, input_parameter: str, *args, **kwargs):
        """
        Implementation of the tool.
        """

        # Any custom tool logic can go here

        output_response = some_method(input_parameter)

        result = AggregateSearchResult(
            generic_tool_result=[web_response],
        )

        # Add to results collector if context is provided
        if context and hasattr(context, "search_results_collector"):
            context.search_results_collector.add_aggregate_result(result)

        return result
```

This template has two basic methods:

1. `__init__` is where we define the tool. The description that we make here is shown to the agent.
2. `execute` is where we define any custom tool logic and interact with the inputs.

### Writing our new tool

Below, we have an example of a toy tool, which takes an integer and string input, returning a silly message to the agent. Should your tool require additional dependencies, be sure to include them in the `user_requirements.txt` file located in the `/docker` directory.

```python
from r2r import Tool, AggregateSearchResult


class SecretMethodTool(Tool):
    """
    A user defined tool.
    """

    def __init__(self):
        super().__init__(
            name="secret_method",
            description="Performs a secret method.",
            parameters={
                "type": "object",
                "properties": {
                    "number": {
                        "type": "string",
                        "description": "An integer input for the secret method.",
                    },
                    "string": {
                        "type": "string",
                        "description": "A string input for the secret method.",
                    },
                },
                "required": ["number", "string"],
            },
            results_function=self.execute,
            llm_format_function=None,
        )

    async def execute(self, number: int, string: str, *args, **kwargs):
        """
        Implementation of the tool.
        """

        output_response = f"Your order for {number} dancing flamingos has been received. They will arrive by unicycle courier within 3-5 business dreams. Please prepare {string} for them."

        result = AggregateSearchResult(
            generic_tool_result=output_response,
        )

        context = self.context
        # Add to results collector if context is provided
        if context and hasattr(context, "search_results_collector"):
            context.search_results_collector.add_aggregate_result(result)

        return result
```

Finally, we can modify our configuration file's `agent` section to include our new tool:

```toml
[agent]
rag_tools = ["secret_method"]
```


Finally, we can run the following and see that our agent called our new method, passed the required parameters, and understood its output:

```python
client.retrieval.agent(
    message={"role": "user", "content": "Can you run the secret method tool? Feel free to use any parameters you want. I just want to see the output."},
)
```

```zsh
results=AgentResponse(messages=[Message(role='assistant', content='The secret method tool produced the following output:\n\n"Your order for 42 dancing flamingos has been received. They will arrive by unicycle courier within 3-5 business dreams. Please prepare Hello, World! for them."\n\nThis whimsical response seems to be a playful and humorous output generated by the tool.', name=None, function_call=None, tool_calls=None, tool_call_id=None, metadata={'citations': [], 'tool_calls': [{'name': 'secret_method', 'args': '{"number":"42","string":"Hello, World!"}'}], 'aggregated_search_result': '[]'}, structured_content=None, image_url=None, image_data=None)], conversation_id='12ad2d6b-1429-48ea-9077-711726d8cfde')
```


================================================
FILE: docs/cookbooks/email.md
================================================
Configuring your deployment to require email verification helps keep your deployment secure, prevents unauthorized account creation,
reduces spam registrations, and ensures you have valid contact information for your users.

Currently, R2R has integrations for both [Mailersend](https://www.mailersend.com/) and [Sendgrid](https://sendgrid.com/).

## Setup
Both Mailersend and Sendgrid require registration, but do offer free tiers for evaluating their services. Create an account with your desired
provider, and generate an API key.

### Mailersend
  - [Create an account](https://www.mailersend.com/signup)
  - [Generate an API key](https://www.mailersend.com/help/managing-api-tokens)

### Sendgrid
  - [Create an account](https://twilio.com/signup)
  - [Generate an API key](https://www.twilio.com/docs/sendgrid/ui/account-and-settings/api-keys)

## Creating a Template
Once you have registered for an account with your email provider, you will want to create an email template. Providers will have pre-made templates, or you can build these from scratch.

![A Mailersend welcome template](./images/email/mailersend.png)


Once you save a template, you will want to make note of the template id. These will go into the configuration files.

## Configuration Settings
We can then configure our deployment with the templates, redirect URL (`frontend_url`), and from email.

### Configuration File


```toml title="mailersend.toml"
[email]
provider = "mailersend"
verify_email_template_id=""
reset_password_template_id=""
password_changed_template_id=""
frontend_url=""
from_email=""
```

```toml title="sendgrid.toml"
[email]
provider = "sendgrid"
verify_email_template_id=""
reset_password_template_id=""
password_changed_template_id=""
frontend_url=""
from_email=""
```

### Environment Variables
It is required to set your provider API key in your environment:

```zsh
export MAILERSEND_API_KEY=…
export SENDGRID_API_KEY=…
```


================================================
FILE: docs/cookbooks/evals.md
================================================
This guide demonstrates how to evaluate your R2R RAG outputs using the Ragas evaluation framework.

In this tutorial, you will:

- Prepare a sample dataset in R2R
- Use R2R's `/rag` endpoint to perform Retrieval-Augmented Generation
- Install and configure Ragas for evaluation
- Evaluate the generated responses using multiple metrics
- Analyze evaluation traces for deeper insights

## Setting Up Ragas for R2R Evaluation

### Installing Ragas
First, install Ragas and its dependencies:

```python
%pip install ragas langchain-openai -q
```

### Configuring Ragas with OpenAI
Ragas uses an LLM to perform evaluations. Set up an OpenAI model as the evaluator:

```python
from langchain_openai import ChatOpenAI
from ragas.llms import LangchainLLMWrapper

# Make sure your OPENAI_API_KEY environment variable is set
llm = ChatOpenAI(model="gpt-4o-mini")
evaluator_llm = LangchainLLMWrapper(llm)

# If you'll be using embeddings for certain metrics
from langchain_openai import OpenAIEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
```

## Sample Dataset and R2R RAG Implementation

For this guide, we assume you have:
1. An initialized R2R client
2. A dataset about AI companies already ingested into R2R
3. Basic knowledge of R2R's RAG capabilities

Here's a quick example of using R2R's `/rag` endpoint to generate an answer:

```python
from r2r import R2RClient

client = R2RClient()  # Assuming R2R_API_KEY is set in your environment

query = "What makes Meta AI's LLaMA models stand out?"

search_settings = {
    "limit": 2,
    "graph_settings": {"enabled": False, "limit": 2},
}

response = client.retrieval.rag(
    query=query,
    search_settings=search_settings
)

print(response.results.generated_answer)
```

The output might look like:
```
Meta AI's LLaMA models stand out due to their open-source nature, which supports innovation and experimentation by making high-quality models accessible to researchers and developers [1]. This approach democratizes AI development, fostering collaboration across industries and enabling researchers without access to expensive resources to work with advanced AI models [2].
```

## Evaluating R2R with Ragas

Ragas provides a comprehensive evaluation framework specifically designed for RAG systems. The R2R-Ragas integration makes it easy to assess the quality of your R2R implementation.

### Creating a Test Dataset

First, prepare a set of test questions and reference answers:

```python
questions = [
    "Who are the major players in the large language model space?",
    "What is Microsoft's Azure AI platform known for?",
    "What kind of models does Cohere provide?",
]

references = [
    "The major players include OpenAI (GPT Series), Anthropic (Claude Series), Google DeepMind (Gemini Models), Meta AI (LLaMA Series), Microsoft Azure AI (integrating GPT Models), Amazon AWS (Bedrock with Claude and Jurassic), Cohere (business-focused models), and AI21 Labs (Jurassic Series).",
    "Microsoft's Azure AI platform is known for integrating OpenAI's GPT models, enabling businesses to use these models in a scalable and secure cloud environment.",
    "Cohere provides language models tailored for business use, excelling in tasks like search, summarization, and customer support.",
]
```

### Collecting R2R Responses

Generate responses using your R2R implementation:

```python
r2r_responses = []

search_settings = {
    "limit": 2,
    "graph_settings": {"enabled": False, "limit": 2},
}

for que in questions:
    response = client.retrieval.rag(query=que, search_settings=search_settings)
    r2r_responses.append(response)
```

### The R2R-Ragas Integration

Ragas includes a dedicated integration for R2R that handles the conversion of R2R's response format to Ragas's evaluation dataset format:

```python
from ragas.integrations.r2r import transform_to_ragas_dataset

# Convert R2R responses to Ragas format
ragas_eval_dataset = transform_to_ragas_dataset(
    user_inputs=questions,
    r2r_responses=r2r_responses,
    references=references
)

print(ragas_eval_dataset)
# Output: EvaluationDataset(features=['user_input', 'retrieved_contexts', 'response', 'reference'], len=3)
```

The `transform_to_ragas_dataset` function extracts the necessary components from R2R responses, including:
- The generated answer
- The retrieved context chunks
- Citation information

### Key Evaluation Metrics for R2R

Ragas offers several metrics that are particularly useful for evaluating R2R implementations:

```python
from ragas.metrics import AnswerRelevancy, ContextPrecision, Faithfulness
from ragas import evaluate

# Define the metrics to use
ragas_metrics = [
    AnswerRelevancy(llm=evaluator_llm),  # How relevant is the answer to the query?
    ContextPrecision(llm=evaluator_llm),  # How precisely were the right documents retrieved?
    Faithfulness(llm=evaluator_llm)       # Does the answer stick to facts in the context?
]

# Run the evaluation
results = evaluate(dataset=ragas_eval_dataset, metrics=ragas_metrics)
```

Each metric provides valuable insights:

- **Answer Relevancy**: Measures how well the R2R-generated response addresses the user's query
- **Context Precision**: Evaluates if R2R's retrieval mechanism is bringing back the most relevant documents
- **Faithfulness**: Checks if R2R's generated answers accurately reflect the information in the retrieved documents

### Interpreting Evaluation Results

The evaluation results show detailed scores for each sample and metric:

```python
# View results as a dataframe
df = results.to_pandas()
print(df)
```

Example output:
```
   user_input                                    retrieved_contexts                                           response                                          reference  answer_relevancy  context_precision  faithfulness
0  Who are the major players...                  [In the rapidly advancing field of...]                      The major players in the large language...         The major players include OpenAI...         1.000000              1.0     1.000000
1  What is Microsoft's Azure AI...              [Microsoft's Azure AI platform is famous for...]            Microsoft's Azure AI platform is known for...      Microsoft's Azure AI platform is...         0.948908              1.0     0.833333
2  What kind of models does Cohere provide?     [Cohere is well-known for its language models...]          Cohere provides language models tailored for...    Cohere provides language models...         0.903765              1.0     1.000000
```

### Advanced Visualization with Ragas App

For a more interactive analysis, upload results to the Ragas app:

```python
# Make sure RAGAS_APP_TOKEN is set in your environment
results.upload()
```

This generates a shareable dashboard with:
- Detailed scores per metric and sample
- Visual comparisons across metrics
- Trace information showing why scores were assigned
- Suggestions for improvement

You can examine:
- Which queries R2R handled well
- Where retrieval or generation could be improved
- Patterns in your RAG system's performance

## Advanced Evaluation Features

### Non-LLM Metrics for Fast Evaluation

In addition to LLM-based metrics, you can use non-LLM metrics for faster evaluations:

```python
from ragas.metrics import BleuScore

# Create a BLEU score metric
bleu_metric = BleuScore()

# Add it to your evaluation
quick_metrics = [bleu_metric]
quick_results = evaluate(dataset=ragas_eval_dataset, metrics=quick_metrics)
```

### Custom Evaluation Criteria with AspectCritic

For tailored evaluations specific to your use case, AspectCritic allows you to define custom evaluation criteria:

```python
from ragas.metrics import AspectCritic

# Define a custom evaluation aspect
custom_metric = AspectCritic(
    name="factual_accuracy",
    llm=evaluator_llm,
    definition="Verify if the answer accurately states company names, model names, and specific capabilities without any factual errors."
)

# Evaluate with your custom criteria
custom_results = evaluate(dataset=ragas_eval_dataset, metrics=[custom_metric])
```

### Training Your Own Metric

If you want to fine-tune metrics to your specific requirements:

1. Use the Ragas app to annotate evaluation results
2. Download the annotations as JSON
3. Train your custom metric:

```python
from ragas.config import InstructionConfig, DemonstrationConfig

demo_config = DemonstrationConfig(embedding=evaluator_embeddings)
inst_config = InstructionConfig(llm=evaluator_llm)

# Train your metric with your annotations
metric.train(
    path="your-annotations.json",
    demonstration_config=demo_config,
    instruction_config=inst_config
)
```

## Conclusion

This guide demonstrated how to use Ragas to thoroughly evaluate your R2R RAG implementation. By leveraging these evaluation tools, you can:

1. Measure the quality of your R2R system across multiple dimensions
2. Identify specific areas for improvement in retrieval and generation
3. Track performance improvements as you refine your implementation
4. Establish benchmarks for consistent quality

Through regular evaluation with Ragas, you can optimize your R2R configuration to deliver the most accurate, relevant, and helpful responses to your users.

For more information on R2R features, refer to the [R2R documentation](https://r2r-docs.sciphi.ai/). To explore additional evaluation metrics and techniques with Ragas, visit the [Ragas documentation](https://docs.ragas.io/).


================================================
FILE: docs/cookbooks/graphs.md
================================================
R2R allows you to build and analyze knowledge graphs from your documents through a collection-based architecture. The system extracts entities and relationships from documents, enabling richer search capabilities that understand connections between information.

The process works in several key stages:
- Documents are first ingested and entities/relationships are extracted
- Collections serve as containers for documents and their corresponding graphs
- Extracted information is pulled into the collection's graph
- Communities can be built to identify higher-level concepts
- The resulting graph enhances search with relationship-aware queries

Collections in R2R are flexible containers that support multiple documents and provide features for access control and graph management. A document can belong to multiple collections, allowing for different organizational schemes and sharing patterns.

The resulting knowledge graphs improve search accuracy by understanding relationships between concepts rather than just performing traditional document search.

<Steps>
### Ingestion and Extraction
Before we can extract entities and relationships from a document, we must ingest a file. After we've successfully ingested a file, we can `extract` the entities and relationships from document.

In the following script, we fetch *The Gift of the Magi* by O. Henry and ingest it our R2R server. We then begin the extraction process, which may take a few minutes to run.

```python
import requests
from r2r import R2RClient
import tempfile
import os

# Set up the client
client = R2RClient("http://localhost:7272")

# Fetch the text file
url = "https://www.gutenberg.org/cache/epub/7256/pg7256.txt"
response = requests.get(url)

# Create a temporary file
temp_dir = tempfile.gettempdir()
temp_file_path = os.path.join(temp_dir, "gift_of_the_magi.txt")
with open(temp_file_path, 'w') as temp_file:
    temp_file.write(response.text)

# Ingest the file
ingest_response = client.documents.create(file_path=temp_file_path)
document_id = ingest_response["results"]["document_id"]

# Extract entities and relationships
extract_response = client.documents.extract(document_id)

# View extracted knowledge
entities = client.documents.list_entities(document_id)
relationships = client.documents.list_relationships(document_id)

# Clean up the temporary file
os.unlink(temp_file_path)
```

As this script runs, we see indications of successful ingestion and extraction.

<Frame
caption="Both ingestion and extraction were successful, as seen in the R2R Dashboard"
>
    <img src="../images/cookbooks/graphs/document_table_success.png" alt="Successful ingestion and extraction in the R2R dashboard." />
</Frame>

<Frame
caption="Some of the entities extracted from the document"
>
    <img src="../images/cookbooks/graphs/entity_view.png" alt="Viewing the entity in the dashboard." />
</Frame>

### Deduplication

If you would like to deduplicate the extracted entities, you can run the following method. To learn more about deduplication, view our [deduplication documentation here](/documentation/deduplication).

```python
from r2r import R2RClient

# Set up the client
client = R2RClient("http://localhost:7272")

client.documents.deduplicate("20e29a97-c53c-506d-b89c-1f5346befc58")
```

While the exact number of extracted entities and relationships will differ across models, this particular document produces approximately 120 entities, with only 20 distinct entities.

### Managing Collections

Graphs are built within a collection, allowing for us to add many documents to a graph, and to share our graphs with other users. When we ingested the file above, it was added into our default collection.

Each collection has a description which is used in the graph creation process. This can be set by the user, or generated using an LLM.

```python
from r2r import R2RClient

# Set up the client
client = R2RClient("http://localhost:7272")

# Update the description of the default collection
collection_id = "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"
update_result = client.collections.update(
    id=collection_id,
    generate_description=True, # LLM generated
)
```

<Frame
    caption="The LLM generated description for our collection"
>
    <img src="../images/cookbooks/graphs/collection_description.png" alt="The resulting description." />
</Frame>

### Pulling Extractions into the Graph

Our graph will not contain the extractions from our documents until we `pull` them into the graph. This gives developers more granular control over the creation and management of graphs.

Recall that we already extracted the entities and relationships for the graph; this means that we can `pull` a document into many graphs without having to rerun the extraction process.

```python
from r2r import R2RClient

# Set up the client
client = R2RClient("http://localhost:7272")

# Pull the extractions from all docments into the default collection
collection_id = "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"
client.graphs.pull(
    collection_id=collection_id
)
```

As soon as we `pull` the extractions into the graph, we can begin using the graph in our searches. We can confirm that the entities and relationships were pulled into the collection, as well.

<Frame
caption="Entities are `pulled` in from the document to the collection"
>
    <img src="../images/cookbooks/graphs/entity_view_collection.png" alt="Successful ingestion and extraction in the R2R dashboard." />
</Frame>

<Frame
caption="The distribution of our entities across category"
>
    <img src="../images/cookbooks/graphs/entity_visualization.png" alt="Entity distribution chart." />
</Frame>


### Building Communities

To further enhance our graph we can build communities, which clusters over the entities and relationships inside our graph. This allows us to capture higher-level concepts that exist within our data.

```python
from r2r import R2RClient

# Set up the client
client = R2RClient("http://localhost:7272")

# Build the communities for the default collection
collection_id = "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"
client.graphs.build(
    collection_id=collection_id
)
```

We can see that the resulting communities capture overall themes and concepts within the story.

<Frame
caption="The resulting communities, generated from the clustering process"
>
    <img src="../images/cookbooks/graphs/communities.png" alt="The communities generated for the collection." />
</Frame>


### Graph Search

Now that we have built our graph we can query over it. Good questions for graphs might require deep understanding of relationships and ideas that span across multiple documents.

```python
from r2r import R2RClient

# Set up the client
client = R2RClient("http://localhost:7272")

results = client.retrieval.search("""
    What items did Della and Jim each originally own,
    what did they do with those items, and what did they
    ultimately give each other?
    """,
    search_settings={
        "graph_settings": {"enabled": True},
    }
)
```

<Frame
    caption="Performing a multi-hop query over the graph"
>
    <img src="../images/cookbooks/graphs/graph_search.png" alt="Performing a searhc over the graph." />
</Frame>


================================================
FILE: docs/cookbooks/ingestion.md
================================================
R2R provides a powerful and flexible ingestion to process and manage various types of documents. It supports a wide range of file formats—text, documents, PDFs, images, audio, and even video—and transforms them into searchable, analyzable content. The ingestion process includes parsing, chunking, embedding, and optionally extracting entities and relationships for knowledge graph construction.

This cookbook will guide you through:

- Ingesting files, raw text, or pre-processed chunks
- Choosing an ingestion mode (`fast`, `hi-res`, `ocr`, or `custom`)
- Updating and deleting documents and chunks

For more on configuring ingestion, see the [Ingestion Configuration Overview](/self-hosting/configuration/ingestion).

### Supported File Types

R2R supports ingestion of the following document types:
| Category          | File types                                |
|-------------------|-------------------------------------------|
| Image             | `.bmp`, `.heic`, `.jpeg`, `.png`, `.tiff` |
| MP3               | `.mp3`                                    |
| PDF               | `.pdf`                                    |
| CSV               | `.csv`                                    |
| E-mail            | `.eml`, `.msg`, `.p7s`                    |
| EPUB              | `.epub`                                   |
| Excel             | `.xls`, `.xlsx`                           |
| HTML              | `.html`                                   |
| Markdown          | `.md`                                     |
| Org Mode          | `.org`                                    |
| Open Office       | `.odt`                                    |
| Plain text        | `.txt`                                    |
| PowerPoint        | `.ppt`, `.pptx`                           |
| reStructured Text | `.rst`                                    |
| Rich Text         | `.rtf`                                    |
| TSV               | `.tsv`                                    |
| Word              | `.doc`, `.docx`                           |
| Code              | `.py`, `.js`, `.ts`, `.css`               |

## Ingestion Modes

R2R offers four primary ingestion modes to tailor the process to your requirements:

- **`fast`**:
  A speed-oriented ingestion mode that prioritizes rapid processing with minimal enrichment. Summaries and some advanced parsing are skipped, making this ideal for quickly processing large volumes of documents.

- **`hi-res`**:
  A comprehensive, high-quality ingestion mode that may leverage multimodal foundation models (visual language models) for parsing complex documents and PDFs, even integrating image-based content.
  - On a **lite** deployment, R2R uses its built-in (`r2r`) parser.
  - On a **full** deployment, it can use `unstructured_local` or `unstructured_api` for more robust parsing and advanced features.
  Choose `hi-res` mode if you need the highest quality extraction, including image-to-text analysis and richer semantic segmentation.

- **`ocr`**:
  OCR mode utilizes optical character recognition models to convert PDFs to markdown. Currently, this mode requires use of Mistral OCR.

- **`custom`**:

Download .txt

gitextract_7stu15in/

├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── custom.md
│   │   └── feature_request.md
│   ├── actions/
│   │   ├── login-docker/
│   │   │   └── action.yml
│   │   ├── setup-docker/
│   │   │   └── action.yml
│   │   ├── setup-postgres-ext/
│   │   │   └── action.yml
│   │   ├── setup-python-full/
│   │   │   └── action.yml
│   │   ├── setup-python-light/
│   │   │   └── action.yml
│   │   ├── start-r2r-full/
│   │   │   └── action.yml
│   │   └── start-r2r-light/
│   │       └── action.yml
│   └── workflows/
│       ├── build-cluster-service-docker.yml
│       ├── build-r2r-docker.yml
│       ├── build-unst-service-docker.yml
│       ├── publish-to-npm.yml
│       ├── publish-to-pypi.yml
│       ├── quality.yml
│       ├── r2r-full-py-integration-tests.yml
│       ├── r2r-js-sdk-ci.yml
│       ├── r2r-js-sdk-integration-tests.yml
│       └── r2r-light-py-integration-tests.yml
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.md
├── MANIFEST.md
├── SECURITY.md
├── deployment/
│   └── k8s/
│       ├── kustomizations/
│       │   ├── helm-values_hatchet.yaml
│       │   ├── helm-values_postgresql.yaml
│       │   ├── include/
│       │   │   ├── cm-hatchet.yaml
│       │   │   ├── cm-hatchet_OLD.yaml
│       │   │   ├── cm-init-scripts-hatchet.yaml
│       │   │   ├── cm-init-scripts-r2r.yaml
│       │   │   ├── cm-r2r.yaml
│       │   │   ├── cm-unstructured.yaml
│       │   │   ├── hatchet-dashboard-initc.yaml
│       │   │   ├── hatchet-engine-initc.yaml
│       │   │   ├── hatchet-init-job.yaml
│       │   │   ├── hatchet-rabbitmq-sts.yaml
│       │   │   ├── pgadmin.yaml
│       │   │   ├── pgvector-sts.yaml
│       │   │   ├── r2r-dashboard-indep.yaml
│       │   │   ├── r2r-graph-clustering-indep.yaml
│       │   │   ├── r2r-initc.yaml
│       │   │   ├── r2r-nginx-indep.yaml
│       │   │   └── unstructured-indep.yaml
│       │   ├── kustomization.yaml
│       │   └── patches/
│       │       ├── hatchet-rabbitmq-sts.yaml
│       │       ├── rm-secret-hatchet-postgres.yaml
│       │       ├── rm-secret-hatchet-rabbitmq-config.yaml
│       │       ├── rm-secret-hatchet-rabbitmq.yaml
│       │       ├── rm-secret-hatchet-shared-config.yaml
│       │       └── service.yaml
│       └── manifests/
│           └── examples/
│               ├── externalsecret_hatchet.yaml
│               ├── externalsecret_r2r.yaml
│               ├── ingress-r2r.yaml
│               ├── secrets_hatchet.yaml
│               └── secrets_r2r.yaml
├── docker/
│   ├── compose.full.swarm.yaml
│   ├── compose.full.yaml
│   ├── compose.yaml
│   ├── env/
│   │   ├── hatchet.env
│   │   ├── minio.env
│   │   ├── postgres.env
│   │   ├── r2r-dashboard.env
│   │   ├── r2r-full.env
│   │   └── r2r.env
│   ├── fluent-bit/
│   │   ├── fluent-bit.conf
│   │   └── parsers.conf
│   ├── scripts/
│   │   ├── create-hatchet-db.sh
│   │   ├── setup-token.sh
│   │   └── start-r2r.sh
│   ├── user_configs/
│   │   └── README.md
│   └── user_tools/
│       ├── README.md
│       └── user_requirements.txt
├── docs/
│   ├── README.md
│   ├── cookbooks/
│   │   ├── application.md
│   │   ├── custom-tools.md
│   │   ├── email.md
│   │   ├── evals.md
│   │   ├── graphs.md
│   │   ├── ingestion.md
│   │   ├── local.md
│   │   ├── logging.md
│   │   ├── maintenance.md
│   │   ├── mcp.md
│   │   ├── orchestration.md
│   │   ├── structured-output.md
│   │   ├── web-dev.md
│   │   └── {README.md}
│   ├── documentation/
│   │   ├── README.md
│   │   ├── advanced/
│   │   │   ├── contextual-enrichment.md
│   │   │   └── deduplication.md
│   │   ├── general/
│   │   │   ├── collections.md
│   │   │   ├── conversations.md
│   │   │   ├── documents.md
│   │   │   ├── graphs.md
│   │   │   ├── prompts.md
│   │   │   └── users.md
│   │   └── retrieval/
│   │       ├── advanced-rag.md
│   │       ├── agentic-rag.md
│   │       ├── hybrid-search.md
│   │       └── search-and-rag.md
│   └── introduction/
│       ├── guides/
│       │   ├── rag.md
│       │   └── what-is-r2r.md
│       └── system.md
├── js/
│   ├── README.md
│   └── sdk/
│       ├── .prettierignore
│       ├── README.md
│       ├── __tests__/
│       │   ├── ChunksIntegrationSuperUser.test.ts
│       │   ├── CollectionsIntegrationSuperUser.test.ts
│       │   ├── ConversationsIntegrationSuperUser.test.ts
│       │   ├── ConversationsIntegrationUser.test.ts
│       │   ├── DocumentsAndCollectionsIntegrationUser.test.ts
│       │   ├── DocumentsIntegrationSuperUser.test.ts
│       │   ├── GraphsIntegrationSuperUser.test.ts
│       │   ├── PromptsIntegrationSuperUser.test.ts
│       │   ├── RetrievalIntegrationSuperUser.test.ts
│       │   ├── SystemIntegrationSuperUser.test.ts
│       │   ├── SystemIntegrationUser.test.ts
│       │   ├── UsersIntegrationSuperUser.test.ts
│       │   └── util/
│       │       └── typeTransformer.test.ts
│       ├── examples/
│       │   └── data/
│       │       ├── folder/
│       │       │   ├── karamozov.txt
│       │       │   └── myshkin.txt
│       │       ├── invalid.json
│       │       ├── marmeladov.txt
│       │       ├── raskolnikov.txt
│       │       ├── raskolnikov_2.txt
│       │       ├── sonia.txt
│       │       └── zametov.txt
│       ├── package.json
│       ├── src/
│       │   ├── baseClient.ts
│       │   ├── index.ts
│       │   ├── r2rClient.ts
│       │   ├── types.ts
│       │   ├── utils/
│       │   │   ├── index.ts
│       │   │   ├── typeTransformer.ts
│       │   │   └── utils.ts
│       │   └── v3/
│       │       └── clients/
│       │           ├── chunks.ts
│       │           ├── collections.ts
│       │           ├── conversations.ts
│       │           ├── documents.ts
│       │           ├── graphs.ts
│       │           ├── indices.ts
│       │           ├── prompts.ts
│       │           ├── retrieval.ts
│       │           ├── system.ts
│       │           └── users.ts
│       └── tsconfig.json
├── llms.txt
├── py/
│   ├── .dockerignore
│   ├── Dockerfile
│   ├── README.md
│   ├── all_possible_config.toml
│   ├── core/
│   │   ├── __init__.py
│   │   ├── agent/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── rag.py
│   │   │   └── research.py
│   │   ├── base/
│   │   │   ├── __init__.py
│   │   │   ├── abstractions/
│   │   │   │   └── __init__.py
│   │   │   ├── agent/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── agent.py
│   │   │   │   └── tools/
│   │   │   │       ├── built_in/
│   │   │   │       │   ├── get_file_content.py
│   │   │   │       │   ├── search_file_descriptions.py
│   │   │   │       │   ├── search_file_knowledge.py
│   │   │   │       │   ├── tavily_extract.py
│   │   │   │       │   ├── tavily_search.py
│   │   │   │       │   ├── web_scrape.py
│   │   │   │       │   └── web_search.py
│   │   │   │       └── registry.py
│   │   │   ├── api/
│   │   │   │   └── models/
│   │   │   │       └── __init__.py
│   │   │   ├── parsers/
│   │   │   │   ├── __init__.py
│   │   │   │   └── base_parser.py
│   │   │   ├── providers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── auth.py
│   │   │   │   ├── base.py
│   │   │   │   ├── crypto.py
│   │   │   │   ├── database.py
│   │   │   │   ├── email.py
│   │   │   │   ├── embedding.py
│   │   │   │   ├── file.py
│   │   │   │   ├── ingestion.py
│   │   │   │   ├── llm.py
│   │   │   │   ├── ocr.py
│   │   │   │   ├── orchestration.py
│   │   │   │   └── scheduler.py
│   │   │   └── utils/
│   │   │       └── __init__.py
│   │   ├── configs/
│   │   │   ├── full.toml
│   │   │   ├── full_azure.toml
│   │   │   ├── full_lm_studio.toml
│   │   │   ├── full_ollama.toml
│   │   │   ├── gemini.toml
│   │   │   ├── lm_studio.toml
│   │   │   ├── ollama.toml
│   │   │   ├── r2r_azure.toml
│   │   │   ├── r2r_azure_with_test_limits.toml
│   │   │   ├── r2r_with_auth.toml
│   │   │   └── tavily.toml
│   │   ├── examples/
│   │   │   ├── __init__.py
│   │   │   ├── data/
│   │   │   │   ├── aristotle.txt
│   │   │   │   ├── aristotle_v2.txt
│   │   │   │   ├── aristotle_v3.txt
│   │   │   │   ├── got.txt
│   │   │   │   ├── pg_essay_1.html
│   │   │   │   ├── pg_essay_2.html
│   │   │   │   ├── pg_essay_3.html
│   │   │   │   ├── pg_essay_4.html
│   │   │   │   ├── pg_essay_5.html
│   │   │   │   ├── test.txt
│   │   │   │   └── yc_companies.txt
│   │   │   ├── hello_r2r.ipynb
│   │   │   ├── hello_r2r.py
│   │   │   └── supported_file_types/
│   │   │       ├── css.css
│   │   │       ├── csv.csv
│   │   │       ├── doc.doc
│   │   │       ├── docx.docx
│   │   │       ├── eml.eml
│   │   │       ├── epub.epub
│   │   │       ├── heic.heic
│   │   │       ├── html.html
│   │   │       ├── js.js
│   │   │       ├── json.json
│   │   │       ├── md.md
│   │   │       ├── msg.msg
│   │   │       ├── odt.odt
│   │   │       ├── org.org
│   │   │       ├── p7s.p7s
│   │   │       ├── ppt.ppt
│   │   │       ├── pptx.pptx
│   │   │       ├── py.py
│   │   │       ├── rst.rst
│   │   │       ├── rtf.rtf
│   │   │       ├── tiff.tiff
│   │   │       ├── ts.ts
│   │   │       ├── tsv.tsv
│   │   │       ├── txt.txt
│   │   │       ├── xls.xls
│   │   │       └── xlsx.xlsx
│   │   ├── main/
│   │   │   ├── __init__.py
│   │   │   ├── abstractions.py
│   │   │   ├── api/
│   │   │   │   └── v3/
│   │   │   │       ├── base_router.py
│   │   │   │       ├── chunks_router.py
│   │   │   │       ├── collections_router.py
│   │   │   │       ├── conversations_router.py
│   │   │   │       ├── documents_router.py
│   │   │   │       ├── graph_router.py
│   │   │   │       ├── indices_router.py
│   │   │   │       ├── prompts_router.py
│   │   │   │       ├── retrieval_router.py
│   │   │   │       ├── system_router.py
│   │   │   │       └── users_router.py
│   │   │   ├── app.py
│   │   │   ├── app_entry.py
│   │   │   ├── assembly/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── builder.py
│   │   │   │   ├── factory.py
│   │   │   │   └── utils.py
│   │   │   ├── config.py
│   │   │   ├── middleware/
│   │   │   │   ├── __init__.py
│   │   │   │   └── project_schema.py
│   │   │   ├── orchestration/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hatchet/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── graph_workflow.py
│   │   │   │   │   └── ingestion_workflow.py
│   │   │   │   └── simple/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── graph_workflow.py
│   │   │   │       └── ingestion_workflow.py
│   │   │   └── services/
│   │   │       ├── __init__.py
│   │   │       ├── auth_service.py
│   │   │       ├── base.py
│   │   │       ├── graph_service.py
│   │   │       ├── ingestion_service.py
│   │   │       ├── maintenance_service.py
│   │   │       ├── management_service.py
│   │   │       └── retrieval_service.py
│   │   ├── parsers/
│   │   │   ├── __init__.py
│   │   │   ├── media/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_parser.py
│   │   │   │   ├── bmp_parser.py
│   │   │   │   ├── doc_parser.py
│   │   │   │   ├── docx_parser.py
│   │   │   │   ├── img_parser.py
│   │   │   │   ├── odt_parser.py
│   │   │   │   ├── pdf_parser.py
│   │   │   │   ├── ppt_parser.py
│   │   │   │   ├── pptx_parser.py
│   │   │   │   └── rtf_parser.py
│   │   │   ├── structured/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── csv_parser.py
│   │   │   │   ├── eml_parser.py
│   │   │   │   ├── epub_parser.py
│   │   │   │   ├── json_parser.py
│   │   │   │   ├── msg_parser.py
│   │   │   │   ├── org_parser.py
│   │   │   │   ├── p7s_parser.py
│   │   │   │   ├── rst_parser.py
│   │   │   │   ├── tsv_parser.py
│   │   │   │   ├── xls_parser.py
│   │   │   │   └── xlsx_parser.py
│   │   │   └── text/
│   │   │       ├── __init__.py
│   │   │       ├── css_parser.py
│   │   │       ├── html_parser.py
│   │   │       ├── js_parser.py
│   │   │       ├── md_parser.py
│   │   │       ├── python_parser.py
│   │   │       ├── text_parser.py
│   │   │       └── ts_parser.py
│   │   ├── providers/
│   │   │   ├── __init__.py
│   │   │   ├── auth/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clerk.py
│   │   │   │   ├── jwt.py
│   │   │   │   ├── r2r_auth.py
│   │   │   │   └── supabase.py
│   │   │   ├── crypto/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bcrypt.py
│   │   │   │   └── nacl.py
│   │   │   ├── database/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── chunks.py
│   │   │   │   ├── collections.py
│   │   │   │   ├── conversations.py
│   │   │   │   ├── documents.py
│   │   │   │   ├── filters.py
│   │   │   │   ├── graphs.py
│   │   │   │   ├── limits.py
│   │   │   │   ├── maintenance.py
│   │   │   │   ├── postgres.py
│   │   │   │   ├── prompts/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── chunk_enrichment.yaml
│   │   │   │   │   ├── collection_summary.yaml
│   │   │   │   │   ├── dynamic_rag_agent.yaml
│   │   │   │   │   ├── dynamic_rag_agent_xml_tooling.yaml
│   │   │   │   │   ├── graph_communities.yaml
│   │   │   │   │   ├── graph_entity_description.yaml
│   │   │   │   │   ├── graph_extraction.yaml
│   │   │   │   │   ├── hyde.yaml
│   │   │   │   │   ├── rag.yaml
│   │   │   │   │   ├── rag_fusion.yaml
│   │   │   │   │   ├── static_rag_agent.yaml
│   │   │   │   │   ├── static_research_agent.yaml
│   │   │   │   │   ├── summary.yaml
│   │   │   │   │   ├── system.yaml
│   │   │   │   │   ├── vision_img.yaml
│   │   │   │   │   └── vision_pdf.yaml
│   │   │   │   ├── prompts_handler.py
│   │   │   │   ├── tokens.py
│   │   │   │   ├── users.py
│   │   │   │   └── utils.py
│   │   │   ├── email/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── console_mock.py
│   │   │   │   ├── mailersend.py
│   │   │   │   ├── sendgrid.py
│   │   │   │   └── smtp.py
│   │   │   ├── embeddings/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── litellm.py
│   │   │   │   ├── ollama.py
│   │   │   │   ├── openai.py
│   │   │   │   └── utils.py
│   │   │   ├── file/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── postgres.py
│   │   │   │   └── s3.py
│   │   │   ├── ingestion/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── r2r/
│   │   │   │   │   └── base.py
│   │   │   │   └── unstructured/
│   │   │   │       └── base.py
│   │   │   ├── llm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── anthropic.py
│   │   │   │   ├── azure_foundry.py
│   │   │   │   ├── litellm.py
│   │   │   │   ├── openai.py
│   │   │   │   ├── r2r_llm.py
│   │   │   │   └── utils.py
│   │   │   ├── ocr/
│   │   │   │   ├── __init__.py
│   │   │   │   └── mistral.py
│   │   │   ├── orchestration/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hatchet.py
│   │   │   │   └── simple.py
│   │   │   └── scheduler/
│   │   │       ├── __init__.py
│   │   │       └── apscheduler.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── context.py
│   │       ├── logging_config.py
│   │       ├── sentry.py
│   │       └── serper.py
│   ├── migrations/
│   │   ├── README
│   │   ├── alembic.ini
│   │   ├── env.py
│   │   ├── script.py.mako
│   │   └── versions/
│   │       ├── 2fac23e4d91b_migrate_to_document_search.py
│   │       ├── 3efc7b3b1b3d_add_total_tokens_count.py
│   │       ├── 7eb70560f406_add_limits_overrides_to_users.py
│   │       ├── 8077140e1e99_v3_api_database_revision.py
│   │       ├── c45a9cf6a8a4_add_user_and_document_count_to_.py
│   │       └── d342e632358a_migrate_to_asyncpg.py
│   ├── pyproject.toml
│   ├── r2r/
│   │   ├── __init__.py
│   │   ├── mcp.py
│   │   ├── r2r.toml
│   │   └── serve.py
│   ├── sdk/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── asnyc_methods/
│   │   │   ├── __init__.py
│   │   │   ├── chunks.py
│   │   │   ├── collections.py
│   │   │   ├── conversations.py
│   │   │   ├── documents.py
│   │   │   ├── graphs.py
│   │   │   ├── indices.py
│   │   │   ├── prompts.py
│   │   │   ├── retrieval.py
│   │   │   ├── system.py
│   │   │   └── users.py
│   │   ├── async_client.py
│   │   ├── base/
│   │   │   ├── __init_.py
│   │   │   └── base_client.py
│   │   ├── models.py
│   │   ├── sync_client.py
│   │   └── sync_methods/
│   │       ├── __init__.py
│   │       ├── chunks.py
│   │       ├── collections.py
│   │       ├── conversations.py
│   │       ├── documents.py
│   │       ├── graphs.py
│   │       ├── indices.py
│   │       ├── prompts.py
│   │       ├── retrieval.py
│   │       ├── system.py
│   │       └── users.py
│   ├── shared/
│   │   ├── __init__.py
│   │   ├── abstractions/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── document.py
│   │   │   ├── exception.py
│   │   │   ├── graph.py
│   │   │   ├── llm.py
│   │   │   ├── prompt.py
│   │   │   ├── search.py
│   │   │   ├── tool.py
│   │   │   ├── user.py
│   │   │   └── vector.py
│   │   ├── api/
│   │   │   └── models/
│   │   │       ├── __init__.py
│   │   │       ├── auth/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       ├── base.py
│   │   │       ├── graph/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       ├── ingestion/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       ├── management/
│   │   │       │   ├── __init__.py
│   │   │       │   └── responses.py
│   │   │       └── retrieval/
│   │   │           ├── __init__.py
│   │   │           └── responses.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── base_utils.py
│   │       └── splitter/
│   │           ├── __init__.py
│   │           └── text.py
│   └── tests/
│       ├── integration/
│       │   ├── conftest.py
│       │   ├── test_agent.py
│       │   ├── test_base.py
│       │   ├── test_chunks.py
│       │   ├── test_collections.py
│       │   ├── test_collections_users_interaction.py
│       │   ├── test_conversations.py
│       │   ├── test_documents.py
│       │   ├── test_filters.py
│       │   ├── test_graphs.py
│       │   ├── test_indices.py
│       │   ├── test_ingestion.py
│       │   ├── test_retrieval.py
│       │   ├── test_retrieval_advanced.py
│       │   ├── test_system.py
│       │   └── test_users.py
│       ├── scaling/
│       │   ├── __init__.py
│       │   └── loadTester.py
│       └── unit/
│           ├── agent/
│           │   ├── test_agent.py
│           │   ├── test_agent_citations.py
│           │   ├── test_agent_citations_old.py
│           │   ├── test_agent_old.py
│           │   └── test_streaming_agent.py
│           ├── app/
│           │   ├── test_config.py
│           │   └── test_routes.py
│           ├── conftest.py
│           ├── database/
│           │   ├── test_collections.py
│           │   ├── test_conversations.py
│           │   ├── test_graphs.py
│           │   └── test_limits.py
│           ├── document/
│           │   ├── test_chunks.py
│           │   ├── test_document_processing.py
│           │   └── test_documents.py
│           └── retrieval/
│               ├── __init__.py
│               ├── conftest.py
│               ├── test_citations.py
│               ├── test_database_filters.py
│               ├── test_rag_processing.py
│               └── test_retrieval_old.py
└── services/
    ├── README.md
    ├── clustering/
    │   ├── Dockerfile.clustering
    │   └── main.py
    └── unstructured/
        ├── Dockerfile.unstructured
        ├── README.md
        └── main.py

Download .txt

Showing preview only (245K chars total). Download the full file or copy to clipboard to get everything.

SYMBOL INDEX (2934 symbols across 243 files)

FILE: js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts
  constant TEST_OUTPUT_DIR (line 5) | const TEST_OUTPUT_DIR = path.join(__dirname, "test-output");

FILE: js/sdk/__tests__/ConversationsIntegrationSuperUser.test.ts
  constant TEST_OUTPUT_DIR (line 7) | const TEST_OUTPUT_DIR = path.join(__dirname, "test-output");

FILE: js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts
  constant TEST_OUTPUT_DIR (line 7) | const TEST_OUTPUT_DIR = path.join(__dirname, "test-output");
  function findOverlap (line 484) | function findOverlap(str1: string, str2: string): string {

FILE: js/sdk/__tests__/GraphsIntegrationSuperUser.test.ts
  constant TEST_OUTPUT_DIR (line 7) | const TEST_OUTPUT_DIR = path.join(__dirname, "test-output");

FILE: js/sdk/__tests__/RetrievalIntegrationSuperUser.test.ts
  function readStream (line 26) | async function readStream(

FILE: js/sdk/__tests__/UsersIntegrationSuperUser.test.ts
  constant TEST_OUTPUT_DIR (line 7) | const TEST_OUTPUT_DIR = path.join(__dirname, "test-output");

FILE: js/sdk/src/baseClient.ts
  function handleRequestError (line 15) | function handleRequestError(response: AxiosResponse): void {
  method constructor (line 46) | constructor(
  method _makeRequest (line 69) | protected async _makeRequest<T = any>(
  method handleStreamingRequest (line 185) | private async handleStreamingRequest<T>(
  method _ensureAuthenticated (line 238) | protected _ensureAuthenticated(): void {
  method setTokens (line 244) | setTokens(accessToken: string, refreshToken: string): void {
  method setApiKey (line 249) | setApiKey(apiKey: string): void {
  method setProjectName (line 256) | setProjectName(projectName: string): void {
  method unsetProjectName (line 263) | unsetProjectName(): void {

FILE: js/sdk/src/r2rClient.ts
  type RefreshTokenResponse (line 20) | type RefreshTokenResponse = {
  type R2RClientOptions (line 27) | interface R2RClientOptions {
  class r2rClient (line 40) | class r2rClient extends BaseClient {
    method constructor (line 56) | constructor(
    method setupResponseInterceptor (line 105) | private setupResponseInterceptor() {
    method makeRequest (line 190) | public makeRequest<T = any>(
    method getRefreshToken (line 198) | public getRefreshToken(): string | null {
    method setTokens (line 202) | public setTokens(

FILE: js/sdk/src/types.ts
  type UnprocessedChunk (line 1) | interface UnprocessedChunk {
  type ResultsWrapper (line 10) | interface ResultsWrapper<T> {
  type PaginatedResultsWrapper (line 14) | interface PaginatedResultsWrapper<T> extends ResultsWrapper<T> {
  type GenericBooleanResponse (line 19) | interface GenericBooleanResponse {
  type GenericMessageResponse (line 23) | interface GenericMessageResponse {
  type ChunkResponse (line 28) | interface ChunkResponse {
  type CollectionResponse (line 39) | interface CollectionResponse {
  type CommunityResponse (line 53) | interface CommunityResponse {
  type ConversationResponse (line 67) | interface ConversationResponse {
  type Message (line 74) | interface Message {
  type MessageResponse (line 84) | interface MessageResponse {
  type DocumentResponse (line 90) | interface DocumentResponse {
  type EntityResponse (line 109) | interface EntityResponse {
  type GraphResponse (line 121) | interface GraphResponse {
  type IndexMeasure (line 132) | enum IndexMeasure {
  type IngestionResponse (line 139) | interface IngestionResponse {
  type UpdateResponse (line 145) | interface UpdateResponse {
  type IndexConfig (line 151) | interface IndexConfig {
  type PromptResponse (line 163) | interface PromptResponse {
  type RelationshipResponse (line 173) | interface RelationshipResponse {
  type ChunkSearchSettings (line 188) | interface ChunkSearchSettings {
  type GenerationConfig (line 195) | interface GenerationConfig {
  type HybridSearchSettings (line 211) | interface HybridSearchSettings {
  type GraphSearchSettings (line 218) | interface GraphSearchSettings {
  type SearchSettings (line 228) | interface SearchSettings {
  type VectorSearchResult (line 243) | interface VectorSearchResult {
  type KGSearchResultType (line 253) | type KGSearchResultType =
  type GraphSearchResult (line 259) | interface GraphSearchResult {
  type CombinedSearchResponse (line 267) | interface CombinedSearchResponse {
  type ServerStats (line 276) | interface ServerStats {
  type SettingsResponse (line 283) | interface SettingsResponse {
  type TokenType (line 291) | type TokenType = "access" | "refresh";
  type Token (line 293) | interface Token {
  type TokenResponse (line 298) | interface TokenResponse {
  type User (line 303) | interface User {
  type LoginResponse (line 322) | interface LoginResponse {
  type StorageTypeLimit (line 327) | interface StorageTypeLimit {
  type StorageLimits (line 333) | interface StorageLimits {
  type UsageLimit (line 339) | interface UsageLimit {
  type RouteUsage (line 345) | interface RouteUsage {
  type Usage (line 350) | interface Usage {
  type SystemDefaults (line 356) | interface SystemDefaults {
  type LimitsResponse (line 362) | interface LimitsResponse {
  type WrappedBooleanResponse (line 371) | type WrappedBooleanResponse = ResultsWrapper<GenericBooleanResponse>;
  type WrappedGenericMessageResponse (line 372) | type WrappedGenericMessageResponse =
  type WrappedChunkResponse (line 376) | type WrappedChunkResponse = ResultsWrapper<ChunkResponse>;
  type WrappedChunksResponse (line 377) | type WrappedChunksResponse = PaginatedResultsWrapper<ChunkResponse[]>;
  type WrappedCollectionResponse (line 380) | type WrappedCollectionResponse = ResultsWrapper<CollectionResponse>;
  type WrappedCollectionsResponse (line 381) | type WrappedCollectionsResponse = PaginatedResultsWrapper<
  type WrappedCommunityResponse (line 386) | type WrappedCommunityResponse = ResultsWrapper<CommunityResponse>;
  type WrappedCommunitiesResponse (line 387) | type WrappedCommunitiesResponse = PaginatedResultsWrapper<
  type WrappedConversationMessagesResponse (line 392) | type WrappedConversationMessagesResponse = ResultsWrapper<
  type WrappedConversationResponse (line 395) | type WrappedConversationResponse =
  type WrappedConversationsResponse (line 397) | type WrappedConversationsResponse = PaginatedResultsWrapper<
  type WrappedMessageResponse (line 400) | type WrappedMessageResponse = ResultsWrapper<MessageResponse>;
  type WrappedMessagesResponse (line 401) | type WrappedMessagesResponse = PaginatedResultsWrapper<
  type WrappedDocumentResponse (line 406) | type WrappedDocumentResponse = ResultsWrapper<DocumentResponse>;
  type WrappedDocumentsResponse (line 407) | type WrappedDocumentsResponse = PaginatedResultsWrapper<
  type WrappedEntityResponse (line 412) | type WrappedEntityResponse = ResultsWrapper<EntityResponse>;
  type WrappedEntitiesResponse (line 413) | type WrappedEntitiesResponse = PaginatedResultsWrapper<EntityResponse[]>;
  type WrappedGraphResponse (line 416) | type WrappedGraphResponse = ResultsWrapper<GraphResponse>;
  type WrappedGraphsResponse (line 417) | type WrappedGraphsResponse = PaginatedResultsWrapper<GraphResponse[]>;
  type WrappedIngestionResponse (line 420) | type WrappedIngestionResponse = ResultsWrapper<IngestionResponse>;
  type WrappedMetadataUpdateResponse (line 421) | type WrappedMetadataUpdateResponse = ResultsWrapper<IngestionResponse>;
  type WrappedUpdateResponse (line 422) | type WrappedUpdateResponse = ResultsWrapper<UpdateResponse>;
  type WrappedVectorIndicesResponse (line 423) | type WrappedVectorIndicesResponse = ResultsWrapper<IndexConfig[]>;
  type WrappedPromptResponse (line 426) | type WrappedPromptResponse = ResultsWrapper<PromptResponse>;
  type WrappedPromptsResponse (line 427) | type WrappedPromptsResponse = PaginatedResultsWrapper<PromptResponse[]>;
  type WrappedRelationshipResponse (line 430) | type WrappedRelationshipResponse = ResultsWrapper<RelationshipResponse>;
  type WrappedRelationshipsResponse (line 431) | type WrappedRelationshipsResponse = PaginatedResultsWrapper<
  type WrappedVectorSearchResponse (line 436) | type WrappedVectorSearchResponse = ResultsWrapper<VectorSearchResult[]>;
  type WrappedSearchResponse (line 437) | type WrappedSearchResponse = ResultsWrapper<CombinedSearchResponse>;
  type WrappedEmbeddingResponse (line 438) | type WrappedEmbeddingResponse = ResultsWrapper<number[]>;
  type WrappedSettingsResponse (line 441) | type WrappedSettingsResponse = ResultsWrapper<SettingsResponse>;
  type WrappedServerStatsResponse (line 442) | type WrappedServerStatsResponse = ResultsWrapper<ServerStats>;
  type WrappedTokenResponse (line 445) | type WrappedTokenResponse = ResultsWrapper<TokenResponse>;
  type WrappedUserResponse (line 446) | type WrappedUserResponse = ResultsWrapper<User>;
  type WrappedUsersResponse (line 447) | type WrappedUsersResponse = PaginatedResultsWrapper<User[]>;
  type WrappedLimitsResponse (line 448) | type WrappedLimitsResponse = ResultsWrapper<LimitsResponse>;
  type WrappedLoginResponse (line 449) | type WrappedLoginResponse = ResultsWrapper<LoginResponse>;
  type R2RResults (line 454) | interface R2RResults<T> {
  type PaginatedR2RResult (line 463) | interface PaginatedR2RResult<T> extends R2RResults<T> {
  type ApiKey (line 477) | interface ApiKey {
  type ApiKeyNoPriv (line 489) | interface ApiKeyNoPriv {
  type WrappedAPIKeyResponse (line 499) | type WrappedAPIKeyResponse = R2RResults<ApiKey>;
  type WrappedAPIKeysResponse (line 504) | type WrappedAPIKeysResponse = PaginatedR2RResult<ApiKeyNoPriv[]>;
  type DocumentSearchResult (line 507) | interface DocumentSearchResult {
  type PaginatedResultsWrapper (line 527) | interface PaginatedResultsWrapper<T> {
  type WrappedDocumentSearchResponse (line 533) | type WrappedDocumentSearchResponse = PaginatedResultsWrapper<

FILE: js/sdk/src/utils/typeTransformer.ts
  type CamelCase (line 4) | type CamelCase<S extends string> = S extends `${infer P}_${infer Q}`
  type CamelCaseKeys (line 11) | type CamelCaseKeys<T> = {
  type SnakeCase (line 25) | type SnakeCase<S extends string> = S extends `${infer T}${infer U}`
  type SnakeCaseKeys (line 34) | type SnakeCaseKeys<T> = {
  function ensureCamelCase (line 101) | function ensureCamelCase<T>(input: T): CamelCaseKeys<T> {
  function ensureSnakeCase (line 158) | function ensureSnakeCase<T>(input: T): SnakeCaseKeys<T> {

FILE: js/sdk/src/utils/utils.ts
  function downloadBlob (line 1) | function downloadBlob(blob: Blob, filename: string): void {

FILE: js/sdk/src/v3/clients/chunks.ts
  class ChunksClient (line 10) | class ChunksClient {
    method constructor (line 11) | constructor(private client: r2rClient) {}
    method create (line 24) | async create(options: {
    method update (line 43) | async update(options: {
    method retrieve (line 58) | async retrieve(options: { id: string }): Promise<WrappedChunkResponse> {
    method delete (line 67) | async delete(options: { id: string }): Promise<WrappedBooleanResponse> {
    method list (line 79) | async list(options?: {

FILE: js/sdk/src/v3/clients/collections.ts
  class CollectionsClient (line 17) | class CollectionsClient {
    method constructor (line 18) | constructor(private client: r2rClient) {}
    method create (line 26) | async create(options: {
    method list (line 43) | async list(options?: {
    method retrieve (line 72) | async retrieve(options: { id: string }): Promise<WrappedCollectionResp...
    method update (line 84) | async update(options: {
    method delete (line 108) | async delete(options: { id: string }): Promise<WrappedBooleanResponse> {
    method listDocuments (line 119) | async listDocuments(options: {
    method addDocument (line 144) | async addDocument(options: {
    method removeDocument (line 160) | async removeDocument(options: {
    method listUsers (line 177) | async listUsers(options: {
    method addUser (line 198) | async addUser(options: {
    method removeUser (line 214) | async removeUser(options: {
    method extract (line 245) | async extract(options: {
    method export (line 276) | async export(
    method exportToFile (line 320) | async exportToFile(options: {
    method retrieveByName (line 337) | async retrieveByName(options: {

FILE: js/sdk/src/v3/clients/conversations.ts
  class ConversationsClient (line 15) | class ConversationsClient {
    method constructor (line 16) | constructor(private client: r2rClient) {}
    method create (line 23) | async create(options?: {
    method list (line 42) | async list(options?: {
    method retrieve (line 66) | async retrieve(options: {
    method update (line 78) | async update(options: {
    method delete (line 96) | async delete(options: { id: string }): Promise<WrappedBooleanResponse> {
    method addMessage (line 109) | async addMessage(options: {
    method updateMessage (line 140) | async updateMessage(options: {
    method export (line 170) | async export(
    method exportToFile (line 214) | async exportToFile(options: {
    method exportMessages (line 236) | async exportMessages(
    method exportMessagesToFile (line 280) | async exportMessagesToFile(options: {

FILE: js/sdk/src/v3/clients/documents.ts
  type FileInput (line 28) | type FileInput = string | File | { path: string; name: string };
  type SearchMode (line 31) | type SearchMode = "basic" | "advanced" | "custom";
  type SearchSettings (line 32) | interface SearchSettings {
  class DocumentsClient (line 45) | class DocumentsClient {
    method constructor (line 46) | constructor(private client: r2rClient) {}
    method create (line 65) | async create(options: {
    method appendMetadata (line 253) | async appendMetadata(options: {
    method replaceMetadata (line 275) | async replaceMetadata(options: {
    method retrieve (line 292) | async retrieve(options: { id: string }): Promise<WrappedDocumentRespon...
    method list (line 308) | async list(options?: {
    method download (line 342) | async download(options: { id: string }): Promise<Blob> {
    method export (line 395) | async export(
    method exportEntities (line 444) | async exportEntities(options: {
    method exportEntitiesToFile (line 497) | async exportEntitiesToFile(options: {
    method exportRelationships (line 540) | async exportRelationships(options: {
    method exportRelationshipsToFile (line 592) | async exportRelationshipsToFile(options: {
    method downloadZip (line 632) | async downloadZip(options: {
    method downloadZipToFile (line 684) | async downloadZipToFile(options: {
    method exportToFile (line 722) | async exportToFile(options: {
    method delete (line 754) | async delete(options: { id: string }): Promise<WrappedBooleanResponse> {
    method listChunks (line 769) | async listChunks(options: {
    method listCollections (line 797) | async listCollections(options: {
    method deleteByFilter (line 824) | async deleteByFilter(options: {
    method extract (line 844) | async extract(options: {
    method listEntities (line 877) | async listEntities(options: {
    method listRelationships (line 907) | async listRelationships(options: {
    method deduplicate (line 947) | async deduplicate(options: {
    method search (line 983) | async search(options: {
    method createSample (line 1009) | async createSample(options?: {

FILE: js/sdk/src/v3/clients/graphs.ts
  class GraphsClient (line 20) | class GraphsClient {
    method constructor (line 21) | constructor(private client: r2rClient) {}
    method list (line 30) | async list(options?: {
    method retrieve (line 54) | async retrieve(options: {
    method reset (line 70) | async reset(options: {
    method update (line 86) | async update(options: {
    method createEntity (line 107) | async createEntity(options: {
    method listEntities (line 137) | async listEntities(options: {
    method getEntity (line 162) | async getEntity(options: {
    method updateEntity (line 179) | async updateEntity(options: {
    method removeEntity (line 209) | async removeEntity(options: {
    method createRelationship (line 224) | async createRelationship(options: {
    method listRelationships (line 262) | async listRelationships(options: {
    method getRelationship (line 287) | async getRelationship(options: {
    method updateRelationship (line 304) | async updateRelationship(options: {
    method removeRelationship (line 342) | async removeRelationship(options: {
    method exportEntities (line 362) | async exportEntities(options: {
    method exportEntitiesToFile (line 405) | async exportEntitiesToFile(options: {
    method exportRelationships (line 428) | async exportRelationships(options: {
    method exportRelationshipsToFile (line 471) | async exportRelationshipsToFile(options: {
    method exportCommunities (line 494) | async exportCommunities(options: {
    method exportCommunitiesToFile (line 537) | async exportCommunitiesToFile(options: {
    method createCommunity (line 574) | async createCommunity(options: {
    method listCommunities (line 610) | async listCommunities(options: {
    method getCommunity (line 635) | async getCommunity(options: {
    method updateCommunity (line 652) | async updateCommunity(options: {
    method deleteCommunity (line 687) | async deleteCommunity(options: {
    method pull (line 719) | async pull(options: {
    method removeDocument (line 740) | async removeDocument(options: {
    method buildCommunities (line 772) | async buildCommunities(options: {

FILE: js/sdk/src/v3/clients/indices.ts
  class IndiciesClient (line 8) | class IndiciesClient {
    method constructor (line 9) | constructor(private client: r2rClient) {}
    method create (line 17) | async create(options: {
    method list (line 40) | async list(options?: {
    method retrieve (line 65) | async retrieve(options: {
    method delete (line 81) | async delete(options: {

FILE: js/sdk/src/v3/clients/prompts.ts
  class PromptsClient (line 9) | class PromptsClient {
    method constructor (line 10) | constructor(private client: r2rClient) {}
    method create (line 22) | async create(options: {
    method list (line 39) | async list(): Promise<WrappedPromptsResponse> {
    method retrieve (line 52) | async retrieve(options: {
    method update (line 76) | async update(options: {
    method delete (line 103) | async delete(options: { name: string }): Promise<WrappedBooleanRespons...

FILE: js/sdk/src/v3/clients/retrieval.ts
  class RetrievalClient (line 12) | class RetrievalClient {
    method constructor (line 13) | constructor(private client: r2rClient) {}
    method search (line 29) | async search(options: {
    method rag (line 64) | async rag(options: {
    method streamRag (line 104) | private async streamRag(
    method agent (line 159) | async agent(options: {
    method streamAgent (line 251) | private async streamAgent(
    method completion (line 279) | async completion(options: {
    method streamCompletion (line 299) | private async streamCompletion(
    method embedding (line 323) | async embedding(options: {

FILE: js/sdk/src/v3/clients/system.ts
  class SystemClient (line 8) | class SystemClient {
    method constructor (line 9) | constructor(private client: r2rClient) {}
    method health (line 14) | async health(): Promise<WrappedGenericMessageResponse> {
    method settings (line 22) | async settings(): Promise<WrappedSettingsResponse> {
    method status (line 31) | async status(): Promise<WrappedServerStatsResponse> {

FILE: js/sdk/src/v3/clients/users.ts
  class UsersClient (line 21) | class UsersClient {
    method constructor (line 22) | constructor(private client: r2rClient) {}
    method create (line 34) | async create(options: {
    method sendVerificationEmail (line 65) | async sendVerificationEmail(options: {
    method delete (line 82) | async delete(options: {
    method verifyEmail (line 98) | async verifyEmail(options: {
    method login (line 113) | async login(options: {
    method loginWithToken (line 142) | async loginWithToken(options: { accessToken: string }): Promise<any> {
    method logout (line 166) | async logout(): Promise<WrappedGenericMessageResponse> {
    method refreshAccessToken (line 176) | async refreshAccessToken(): Promise<WrappedTokenResponse> {
    method changePassword (line 211) | async changePassword(options: {
    method requestPasswordReset (line 220) | async requestPasswordReset(
    method resetPassword (line 237) | async resetPassword(options: {
    method list (line 253) | async list(options?: {
    method retrieve (line 277) | async retrieve(options: { id: string }): Promise<WrappedUserResponse> {
    method me (line 285) | async me(): Promise<WrappedUserResponse> {
    method update (line 299) | async update(options: {
    method listCollections (line 333) | async listCollections(options: {
    method addToCollection (line 354) | async addToCollection(options: {
    method removeFromCollection (line 370) | async removeFromCollection(options: {
    method export (line 390) | async export(
    method exportToFile (line 430) | async exportToFile(options: {
    method createApiKey (line 448) | async createApiKey(options: {
    method listApiKeys (line 469) | async listApiKeys(options: { id: string }): Promise<WrappedAPIKeysResp...
    method deleteApiKey (line 480) | async deleteApiKey(options: {
    method getLimits (line 490) | async getLimits(options: { id: string }): Promise<WrappedLimitsRespons...
    method oauthGoogleAuthorize (line 494) | async oauthGoogleAuthorize(): Promise<{ redirect_url: string }> {
    method oauthGithubAuthorize (line 498) | async oauthGithubAuthorize(): Promise<{ redirect_url: string }> {
    method oauthGoogleCallback (line 502) | async oauthGoogleCallback(options: {
    method oauthGithubCallback (line 514) | async oauthGithubCallback(options: {

FILE: py/core/agent/base.py
  class CombinedMeta (line 22) | class CombinedMeta(AsyncSyncMeta, ABCMeta):
  function sync_wrapper (line 26) | def sync_wrapper(async_gen):
  class R2RAgent (line 42) | class R2RAgent(Agent, metaclass=CombinedMeta):
    method __init__ (line 43) | def __init__(self, *args, **kwargs):
    method _generate_llm_summary (line 48) | async def _generate_llm_summary(self, iterations_count: int) -> str:
    method _reset (line 96) | def _reset(self):
    method arun (line 101) | async def arun(
    method process_llm_response (line 153) | async def process_llm_response(
  class R2RStreamingAgent (line 351) | class R2RStreamingAgent(R2RAgent):
    method __init__ (line 363) | def __init__(self, *args, **kwargs):
    method arun (line 369) | async def arun(
    method _handle_thinking (line 693) | async def _handle_thinking(
    method _add_tool_calls_message (line 733) | async def _add_tool_calls_message(self, calls_list, partial_text_buffer):
    method _create_tool_call_data (line 752) | def _create_tool_call_data(self, call_info):
    method _create_citation_payload (line 760) | def _create_citation_payload(self, short_id, payload):
    method _create_final_answer_payload (line 777) | def _create_final_answer_payload(self, answer_text, citations):
  class R2RXMLStreamingAgent (line 788) | class R2RXMLStreamingAgent(R2RStreamingAgent):
    method arun (line 817) | async def arun(
    method _parse_single_tool_call (line 1270) | def _parse_single_tool_call(
  class R2RXMLToolsAgent (line 1310) | class R2RXMLToolsAgent(R2RAgent):
    method process_llm_response (line 1341) | async def process_llm_response(self, response, *args, **kwargs):
    method _parse_single_tool_call (line 1447) | def _parse_single_tool_call(

FILE: py/core/agent/rag.py
  class RAGAgentMixin (line 39) | class RAGAgentMixin:
    method __init__ (line 48) | def __init__(
    method _register_tools (line 72) | def _register_tools(self):
    method format_search_results_for_llm (line 102) | def format_search_results_for_llm(
  class R2RRAGAgent (line 115) | class R2RRAGAgent(RAGAgentMixin, R2RAgent):
    method __init__ (line 120) | def __init__(
  class R2RXMLToolsRAGAgent (line 165) | class R2RXMLToolsRAGAgent(RAGAgentMixin, R2RXMLToolsAgent):
    method __init__ (line 170) | def __init__(
  class R2RStreamingRAGAgent (line 215) | class R2RStreamingRAGAgent(RAGAgentMixin, R2RStreamingAgent):
    method __init__ (line 221) | def __init__(
  class R2RXMLToolsStreamingRAGAgent (line 269) | class R2RXMLToolsStreamingRAGAgent(RAGAgentMixin, R2RXMLStreamingAgent):
    method __init__ (line 281) | def __init__(

FILE: py/core/agent/research.py
  class ResearchAgentMixin (line 35) | class ResearchAgentMixin(RAGAgentMixin):
    method __init__ (line 46) | def __init__(
    method _register_research_tools (line 74) | def _register_research_tools(self):
    method rag_tool (line 97) | def rag_tool(self) -> Tool:
    method reasoning_tool (line 121) | def reasoning_tool(self) -> Tool:
    method critique_tool (line 150) | def critique_tool(self) -> Tool:
    method python_execution_tool (line 180) | def python_execution_tool(self) -> Tool:
    method _rag (line 208) | async def _rag(
    method _reason (line 281) | async def _reason(
    method _critique (line 308) | async def _critique(
    method _execute_python_with_process_timeout (line 358) | async def _execute_python_with_process_timeout(
    method _format_python_results (line 426) | def _format_python_results(self, results: dict[str, Any]) -> str:
    method _format_search_results (line 473) | def _format_search_results(self, results) -> str:
  class R2RResearchAgent (line 478) | class R2RResearchAgent(ResearchAgentMixin, R2RRAGAgent):
    method __init__ (line 486) | def __init__(
  class R2RStreamingResearchAgent (line 537) | class R2RStreamingResearchAgent(ResearchAgentMixin, R2RStreamingRAGAgent):
    method __init__ (line 545) | def __init__(
  class R2RXMLToolsResearchAgent (line 597) | class R2RXMLToolsResearchAgent(ResearchAgentMixin, R2RXMLToolsRAGAgent):
    method __init__ (line 605) | def __init__(
  class R2RXMLToolsStreamingResearchAgent (line 652) | class R2RXMLToolsStreamingResearchAgent(
    method __init__ (line 662) | def __init__(

FILE: py/core/base/agent/agent.py
  class Conversation (line 23) | class Conversation:
    method __init__ (line 24) | def __init__(self):
    method add_message (line 28) | async def add_message(self, message):
    method get_messages (line 32) | async def get_messages(self) -> list[dict[str, Any]]:
  class AgentConfig (line 41) | class AgentConfig(BaseModel):
    method create (line 49) | def create(cls: Type["AgentConfig"], **kwargs: Any) -> "AgentConfig":
  class Agent (line 59) | class Agent(ABC):
    method __init__ (line 60) | def __init__(
    method _register_tools (line 78) | def _register_tools(self):
    method _setup (line 81) | async def _setup(
    method tools (line 101) | def tools(self) -> list[Tool]:
    method tools (line 105) | def tools(self, tools: list[Tool]):
    method arun (line 109) | async def arun(
    method process_llm_response (line 119) | async def process_llm_response(
    method execute_tool (line 127) | async def execute_tool(self, tool_name: str, *args, **kwargs) -> str:
    method get_generation_config (line 133) | def get_generation_config(
    method handle_function_or_tool_call (line 174) | async def handle_function_or_tool_call(
  class RAGAgentConfig (line 258) | class RAGAgentConfig(AgentConfig):
    method create (line 288) | def create(cls: Type["AgentConfig"], **kwargs: Any) -> "AgentConfig":

FILE: py/core/base/agent/tools/built_in/get_file_content.py
  class GetFileContentTool (line 10) | class GetFileContentTool(Tool):
    method __init__ (line 18) | def __init__(self):
    method execute (line 42) | async def execute(

FILE: py/core/base/agent/tools/built_in/search_file_descriptions.py
  class SearchFileDescriptionsTool (line 8) | class SearchFileDescriptionsTool(Tool):
    method __init__ (line 13) | def __init__(self):
    method execute (line 35) | async def execute(self, query: str, *args, **kwargs):

FILE: py/core/base/agent/tools/built_in/search_file_knowledge.py
  class SearchFileKnowledgeTool (line 8) | class SearchFileKnowledgeTool(Tool):
    method __init__ (line 13) | def __init__(self):
    method execute (line 34) | async def execute(self, query: str, *args, **kwargs):

FILE: py/core/base/agent/tools/built_in/tavily_extract.py
  class TavilyExtractTool (line 11) | class TavilyExtractTool(Tool):
    method __init__ (line 16) | def __init__(self):
    method execute (line 41) | async def execute(self, url: str, *args, **kwargs):

FILE: py/core/base/agent/tools/built_in/tavily_search.py
  class TavilySearchTool (line 11) | class TavilySearchTool(Tool):
    method __init__ (line 17) | def __init__(self):
    method execute (line 45) | async def execute(self, query: str, *args, **kwargs):

FILE: py/core/base/agent/tools/built_in/web_scrape.py
  class WebScrapeTool (line 11) | class WebScrapeTool(Tool):
    method __init__ (line 17) | def __init__(self):
    method execute (line 42) | async def execute(self, url: str, *args, **kwargs):

FILE: py/core/base/agent/tools/built_in/web_search.py
  class WebSearchTool (line 4) | class WebSearchTool(Tool):
    method __init__ (line 10) | def __init__(self):
    method execute (line 31) | async def execute(self, query: str, *args, **kwargs):

FILE: py/core/base/agent/tools/registry.py
  class ToolRegistry (line 14) | class ToolRegistry:
    method __init__ (line 20) | def __init__(
    method _discover_built_in_tools (line 47) | def _discover_built_in_tools(self):
    method _discover_user_tools (line 98) | def _discover_user_tools(self):
    method get_tool_class (line 145) | def get_tool_class(self, tool_name: str):
    method list_available_tools (line 152) | def list_available_tools(
    method create_tool_instance (line 169) | def create_tool_instance(

FILE: py/core/base/parsers/base_parser.py
  class AsyncParser (line 9) | class AsyncParser(ABC, Generic[T]):
    method ingest (line 11) | async def ingest(self, data: T, **kwargs) -> AsyncGenerator[str, None]:

FILE: py/core/base/providers/auth.py
  class AuthConfig (line 27) | class AuthConfig(ProviderConfig):
    method supported_providers (line 37) | def supported_providers(self) -> list[str]:
    method validate_config (line 40) | def validate_config(self) -> None:
  class AuthProvider (line 44) | class AuthProvider(Provider, ABC):
    method __init__ (line 50) | def __init__(
    method _get_default_admin_user (line 71) | async def _get_default_admin_user(self) -> User:
    method create_access_token (line 77) | def create_access_token(self, data: dict) -> str:
    method create_refresh_token (line 81) | def create_refresh_token(self, data: dict) -> str:
    method decode_token (line 85) | async def decode_token(self, token: str) -> TokenData:
    method user (line 89) | async def user(self, token: str) -> User:
    method get_current_active_user (line 93) | def get_current_active_user(self, current_user: User) -> User:
    method register (line 97) | async def register(self, email: str, password: str) -> User:
    method send_verification_email (line 101) | async def send_verification_email(
    method verify_email (line 107) | async def verify_email(
    method login (line 113) | async def login(self, email: str, password: str) -> dict[str, Token]:
    method refresh_access_token (line 117) | async def refresh_access_token(
    method auth_wrapper (line 122) | def auth_wrapper(
    method change_password (line 210) | async def change_password(
    method request_password_reset (line 216) | async def request_password_reset(self, email: str) -> dict[str, str]:
    method confirm_password_reset (line 220) | async def confirm_password_reset(
    method logout (line 226) | async def logout(self, token: str) -> dict[str, str]:
    method send_reset_email (line 230) | async def send_reset_email(self, email: str) -> dict[str, str]:

FILE: py/core/base/providers/base.py
  class InnerConfig (line 7) | class InnerConfig(BaseModel, ABC):
    class Config (line 12) | class Config:
    method create (line 18) | def create(cls: Type["InnerConfig"], **kwargs: Any) -> "InnerConfig":
  class AppConfig (line 32) | class AppConfig(InnerConfig):
  class ProviderConfig (line 84) | class ProviderConfig(BaseModel, ABC):
    class Config (line 91) | class Config:
    method validate_config (line 97) | def validate_config(self) -> None:
    method create (line 101) | def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderCon...
    method supported_providers (line 116) | def supported_providers(self) -> list[str]:
    method from_dict (line 121) | def from_dict(
  class Provider (line 128) | class Provider(ABC):
    method __init__ (line 132) | def __init__(self, config: ProviderConfig, *args, **kwargs):

FILE: py/core/base/providers/crypto.py
  class CryptoConfig (line 8) | class CryptoConfig(ProviderConfig):
    method supported_providers (line 12) | def supported_providers(self) -> list[str]:
    method validate_config (line 15) | def validate_config(self) -> None:
  class CryptoProvider (line 20) | class CryptoProvider(Provider, ABC):
    method __init__ (line 21) | def __init__(self, config: CryptoConfig):
    method get_password_hash (line 29) | def get_password_hash(self, password: str) -> str:
    method verify_password (line 35) | def verify_password(
    method generate_verification_code (line 43) | def generate_verification_code(self, length: int = 32) -> str:
    method generate_signing_keypair (line 48) | def generate_signing_keypair(self) -> Tuple[str, str, str]:
    method sign_request (line 60) | def sign_request(self, private_key: str, data: str) -> str:
    method verify_request_signature (line 66) | def verify_request_signature(
    method generate_api_key (line 74) | def generate_api_key(self) -> Tuple[str, str]:
    method hash_api_key (line 85) | def hash_api_key(self, raw_api_key: str) -> str:
    method verify_api_key (line 93) | def verify_api_key(self, raw_api_key: str, hashed_key: str) -> bool:
    method generate_secure_token (line 98) | def generate_secure_token(self, data: dict, expiry: datetime) -> str:
    method verify_secure_token (line 111) | def verify_secure_token(self, token: str) -> Optional[dict]:

FILE: py/core/base/providers/database.py
  class DatabaseConnectionManager (line 22) | class DatabaseConnectionManager(ABC):
    method execute_query (line 24) | def execute_query(
    method execute_many (line 33) | async def execute_many(self, query, params=None, batch_size=1000):
    method fetch_query (line 37) | def fetch_query(
    method fetchrow_query (line 45) | def fetchrow_query(
    method initialize (line 53) | async def initialize(self, pool: Any):
  class Handler (line 57) | class Handler(ABC):
    method __init__ (line 58) | def __init__(
    method _get_table_name (line 66) | def _get_table_name(self, base_name: str) -> str:
    method create_tables (line 71) | def create_tables(self):
  class PostgresConfigurationSettings (line 75) | class PostgresConfigurationSettings(BaseModel):
  class LimitSettings (line 104) | class LimitSettings(BaseModel):
    method merge_with_defaults (line 109) | def merge_with_defaults(
  class MaintenanceSettings (line 119) | class MaintenanceSettings(BaseModel):
  class DatabaseConfig (line 125) | class DatabaseConfig(ProviderConfig):
    method validate_config (line 163) | def validate_config(self) -> None:
    method supported_providers (line 168) | def supported_providers(self) -> list[str]:
    method from_dict (line 172) | def from_dict(cls, data: dict[str, Any]) -> "DatabaseConfig":
  class DatabaseProvider (line 193) | class DatabaseProvider(Provider):
    method __init__ (line 198) | def __init__(self, config: DatabaseConfig):
    method __aenter__ (line 203) | async def __aenter__(self):
    method __aexit__ (line 207) | async def __aexit__(self, exc_type, exc, tb):

FILE: py/core/base/providers/email.py
  class EmailConfig (line 9) | class EmailConfig(ProviderConfig):
    method supported_providers (line 25) | def supported_providers(self) -> list[str]:
    method validate_config (line 33) | def validate_config(self) -> None:
  class EmailProvider (line 56) | class EmailProvider(Provider, ABC):
    method __init__ (line 57) | def __init__(self, config: EmailConfig):
    method send_email (line 66) | async def send_email(
    method send_verification_email (line 78) | async def send_verification_email(
    method send_password_reset_email (line 84) | async def send_password_reset_email(
    method send_password_changed_email (line 90) | async def send_password_changed_email(

FILE: py/core/base/providers/embedding.py
  class EmbeddingConfig (line 21) | class EmbeddingConfig(ProviderConfig):
    method validate_config (line 38) | def validate_config(self) -> None:
    method supported_providers (line 43) | def supported_providers(self) -> list[str]:
  class EmbeddingProvider (line 47) | class EmbeddingProvider(Provider):
    class Step (line 48) | class Step(Enum):
    method __init__ (line 52) | def __init__(self, config: EmbeddingConfig):
    method _execute_with_backoff_async (line 64) | async def _execute_with_backoff_async(self, task: dict[str, Any]):
    method _execute_with_backoff_sync (line 83) | def _execute_with_backoff_sync(self, task: dict[str, Any]):
    method _execute_task (line 102) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 106) | def _execute_task_sync(self, task: dict[str, Any]):
    method async_get_embedding (line 109) | async def async_get_embedding(
    method get_embedding (line 120) | def get_embedding(
    method async_get_embeddings (line 131) | async def async_get_embeddings(
    method get_embeddings (line 142) | def get_embeddings(
    method rerank (line 154) | def rerank(
    method arerank (line 164) | async def arerank(

FILE: py/core/base/providers/file.py
  class FileConfig (line 14) | class FileConfig(ProviderConfig):
    method supported_providers (line 29) | def supported_providers(self) -> list[str]:
    method validate_config (line 38) | def validate_config(self) -> None:
  class FileProvider (line 50) | class FileProvider(Provider, ABC):
    method __init__ (line 55) | def __init__(self, config: FileConfig):
    method initialize (line 64) | async def initialize(self) -> None:
    method store_file (line 69) | async def store_file(
    method retrieve_file (line 80) | async def retrieve_file(
    method retrieve_files_as_zip (line 87) | async def retrieve_files_as_zip(
    method delete_file (line 97) | async def delete_file(self, document_id: UUID) -> bool:
    method get_files_overview (line 102) | async def get_files_overview(

FILE: py/core/base/providers/ingestion.py
  class ChunkingStrategy (line 19) | class ChunkingStrategy(str, Enum):
  class IngestionConfig (line 26) | class IngestionConfig(ProviderConfig):
    method set_default (line 142) | def set_default(cls, **kwargs):
    method supported_providers (line 152) | def supported_providers(self) -> list[str]:
    method validate_config (line 155) | def validate_config(self) -> None:
    method get_default (line 162) | def get_default(cls, mode: str, app) -> "IngestionConfig":
  class IngestionProvider (line 174) | class IngestionProvider(Provider, ABC):
    method __init__ (line 179) | def __init__(

FILE: py/core/base/providers/llm.py
  class CompletionConfig (line 22) | class CompletionConfig(ProviderConfig):
    method validate_config (line 31) | def validate_config(self) -> None:
    method supported_providers (line 38) | def supported_providers(self) -> list[str]:
  class CompletionProvider (line 42) | class CompletionProvider(Provider):
    method __init__ (line 43) | def __init__(self, config: CompletionConfig) -> None:
    method _execute_with_backoff_async (line 56) | async def _execute_with_backoff_async(
    method _execute_with_backoff_async_stream (line 91) | async def _execute_with_backoff_async_stream(
    method _execute_with_backoff_sync (line 114) | def _execute_with_backoff_sync(
    method _execute_with_backoff_sync_stream (line 142) | def _execute_with_backoff_sync_stream(
    method _execute_task (line 162) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 166) | def _execute_task_sync(self, task: dict[str, Any]):
    method aget_completion (line 169) | async def aget_completion(
    method aget_completion_stream (line 186) | async def aget_completion_stream(
    method get_completion_stream (line 220) | def get_completion_stream(

FILE: py/core/base/providers/ocr.py
  class OCRConfig (line 16) | class OCRConfig(ProviderConfig):
    method validate_config (line 24) | def validate_config(self) -> None:
    method supported_providers (line 31) | def supported_providers(self) -> list[str]:
  class OCRProvider (line 35) | class OCRProvider(Provider):
    method __init__ (line 36) | def __init__(self, config: OCRConfig) -> None:
    method _execute_with_backoff_async (line 49) | async def _execute_with_backoff_async(self, task: dict[str, Any]):
    method _execute_with_backoff_sync (line 68) | def _execute_with_backoff_sync(self, task: dict[str, Any]):
    method _execute_task (line 85) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 89) | def _execute_task_sync(self, task: dict[str, Any]):
    method upload_file (line 93) | async def upload_file(
    method process_file (line 102) | async def process_file(
    method process_url (line 108) | async def process_url(
    method process_pdf (line 117) | async def process_pdf(

FILE: py/core/base/providers/orchestration.py
  class Workflow (line 8) | class Workflow(Enum):
  class OrchestrationConfig (line 13) | class OrchestrationConfig(ProviderConfig):
    method validate_config (line 20) | def validate_config(self) -> None:
    method supported_providers (line 25) | def supported_providers(self) -> list[str]:
  class OrchestrationProvider (line 29) | class OrchestrationProvider(Provider):
    method __init__ (line 30) | def __init__(self, config: OrchestrationConfig):
    method start_worker (line 36) | async def start_worker(self):
    method get_worker (line 40) | def get_worker(self, name: str, max_runs: int) -> Any:
    method step (line 44) | def step(self, *args, **kwargs) -> Any:
    method workflow (line 48) | def workflow(self, *args, **kwargs) -> Any:
    method failure (line 52) | def failure(self, *args, **kwargs) -> Any:
    method register_workflows (line 56) | def register_workflows(
    method run_workflow (line 62) | async def run_workflow(

FILE: py/core/base/providers/scheduler.py
  class SchedulerConfig (line 6) | class SchedulerConfig(ProviderConfig):
    method validate_config (line 11) | def validate_config(self):
    method supported_providers (line 18) | def supported_providers(self) -> list[str]:
  class SchedulerProvider (line 22) | class SchedulerProvider(Provider):
    method __init__ (line 25) | def __init__(self, config: SchedulerConfig):
    method add_job (line 30) | async def add_job(self, func, trigger, **kwargs):
    method start (line 34) | async def start(self):
    method shutdown (line 38) | async def shutdown(self):

FILE: py/core/examples/supported_file_types/js.js
  function main (line 8) | async function main() {

FILE: py/core/examples/supported_file_types/py.py
  class HTMLParser (line 14) | class HTMLParser(AsyncParser[str | bytes]):
    method __init__ (line 17) | def __init__(
    method ingest (line 27) | async def ingest(

FILE: py/core/examples/supported_file_types/ts.ts
  function handleRequestError (line 19) | function handleRequestError(response: AxiosResponse): void {
  method constructor (line 49) | constructor(
  method _makeRequest (line 72) | protected async _makeRequest<T = any>(
  method handleStreamingRequest (line 184) | private async handleStreamingRequest<T>(
  method _ensureAuthenticated (line 237) | protected _ensureAuthenticated(): void {
  method setTokens (line 243) | setTokens(accessToken: string, refreshToken: string): void {

FILE: py/core/main/abstractions.py
  class R2RProviders (line 44) | class R2RProviders(BaseModel):
    class Config (line 80) | class Config:
  class R2RServices (line 85) | class R2RServices:

FILE: py/core/main/api/v3/base_router.py
  class BaseRouterV3 (line 17) | class BaseRouterV3:
    method __init__ (line 18) | def __init__(
    method get_router (line 38) | def get_router(self):
    method base_endpoint (line 41) | def base_endpoint(self, func: Callable):
    method build_router (line 81) | def build_router(cls, engine):
    method _register_workflows (line 86) | def _register_workflows(self):
    method _load_openapi_extras (line 89) | def _load_openapi_extras(self):
    method _setup_routes (line 93) | def _setup_routes(self):
    method set_rate_limiting (line 97) | def set_rate_limiting(self):

FILE: py/core/main/api/v3/chunks_router.py
  class ChunksRouter (line 34) | class ChunksRouter(BaseRouterV3):
    method __init__ (line 35) | def __init__(
    method _setup_routes (line 41) | def _setup_routes(self):

FILE: py/core/main/api/v3/collections_router.py
  class CollectionAction (line 34) | class CollectionAction(str, Enum):
  function authorize_collection_action (line 43) | async def authorize_collection_action(
  class CollectionsRouter (line 88) | class CollectionsRouter(BaseRouterV3):
    method __init__ (line 89) | def __init__(
    method _setup_routes (line 95) | def _setup_routes(self):

FILE: py/core/main/api/v3/conversations_router.py
  class ConversationsRouter (line 27) | class ConversationsRouter(BaseRouterV3):
    method __init__ (line 28) | def __init__(
    method _setup_routes (line 34) | def _setup_routes(self):

FILE: py/core/main/api/v3/documents_router.py
  function merge_search_settings (line 52) | def merge_search_settings(
  function merge_ingestion_config (line 68) | def merge_ingestion_config(
  class DocumentsRouter (line 80) | class DocumentsRouter(BaseRouterV3):
    method __init__ (line 81) | def __init__(
    method _prepare_search_settings (line 91) | def _prepare_search_settings(
    method _register_workflows (line 121) | def _register_workflows(self):
    method _prepare_ingestion_config (line 159) | def _prepare_ingestion_config(
    method _setup_routes (line 181) | def _setup_routes(self):
    method _process_file (line 2349) | async def _process_file(file):

FILE: py/core/main/api/v3/graph_router.py
  class GraphRouter (line 38) | class GraphRouter(BaseRouterV3):
    method __init__ (line 39) | def __init__(
    method _register_workflows (line 49) | def _register_workflows(self):
    method _get_collection_id (line 78) | async def _get_collection_id(
    method _setup_routes (line 87) | def _setup_routes(self):

FILE: py/core/main/api/v3/indices_router.py
  class IndicesRouter (line 24) | class IndicesRouter(BaseRouterV3):
    method __init__ (line 25) | def __init__(
    method _setup_routes (line 31) | def _setup_routes(self):

FILE: py/core/main/api/v3/prompts_router.py
  class PromptsRouter (line 22) | class PromptsRouter(BaseRouterV3):
    method __init__ (line 23) | def __init__(
    method _setup_routes (line 29) | def _setup_routes(self):

FILE: py/core/main/api/v3/retrieval_router.py
  function merge_search_settings (line 33) | def merge_search_settings(
  class RetrievalRouter (line 49) | class RetrievalRouter(BaseRouterV3):
    method __init__ (line 50) | def __init__(
    method _register_workflows (line 56) | def _register_workflows(self):
    method _prepare_search_settings (line 59) | def _prepare_search_settings(
    method _setup_routes (line 86) | def _setup_routes(self):

FILE: py/core/main/api/v3/system_router.py
  class SystemRouter (line 21) | class SystemRouter(BaseRouterV3):
    method __init__ (line 22) | def __init__(
    method _setup_routes (line 32) | def _setup_routes(self):

FILE: py/core/main/api/v3/users_router.py
  class UsersRouter (line 40) | class UsersRouter(BaseRouterV3):
    method __init__ (line 41) | def __init__(
    method _setup_routes (line 54) | def _setup_routes(self):

FILE: py/core/main/app.py
  class R2RApp (line 28) | class R2RApp:
    method __init__ (line 29) | def __init__(
    method _setup_routes (line 80) | def _setup_routes(self):
    method _apply_middleware (line 100) | def _apply_middleware(self):
    method serve (line 117) | async def serve(self, host: str = "0.0.0.0", port: int = 7272):

FILE: py/core/main/app_entry.py
  function lifespan (line 25) | async def lifespan(app: FastAPI):
  function create_r2r_app (line 51) | async def create_r2r_app(
  function r2r_exception_handler (line 116) | async def r2r_exception_handler(request: Request, exc: R2RException):

FILE: py/core/main/assembly/builder.py
  class R2RBuilder (line 32) | class R2RBuilder:
    method __init__ (line 42) | def __init__(self, config: R2RConfig):
    method build (line 45) | async def build(self, *args, **kwargs) -> R2RApp:
    method _create_providers (line 142) | async def _create_providers(
    method _create_services (line 148) | def _create_services(self, service_params: dict[str, Any]) -> R2RServi...

FILE: py/core/main/assembly/factory.py
  class R2RProviderFactory (line 58) | class R2RProviderFactory:
    method __init__ (line 59) | def __init__(self, config: R2RConfig):
    method create_auth_provider (line 63) | async def create_auth_provider(
    method create_crypto_provider (line 105) | def create_crypto_provider(
    method create_ocr_provider (line 122) | def create_ocr_provider(
    method create_ingestion_provider (line 134) | def create_ingestion_provider(
    method create_orchestration_provider (line 185) | def create_orchestration_provider(
    method create_database_provider (line 201) | async def create_database_provider(
    method create_file_provider (line 232) | def create_file_provider(
    method create_embedding_provider (line 252) | def create_embedding_provider(
    method create_llm_provider (line 288) | def create_llm_provider(
    method create_email_provider (line 314) | async def create_email_provider(
    method create_scheduler_provider (line 342) | async def create_scheduler_provider(
    method create_providers (line 353) | async def create_providers(

FILE: py/core/main/assembly/utils.py
  function install_user_tool_dependencies (line 9) | def install_user_tool_dependencies(user_tools_path: str):

FILE: py/core/main/config.py
  class R2RConfig (line 29) | class R2RConfig:
    method __init__ (line 86) | def __init__(self, config_data: dict[str, Any]):
    method _validate_config_section (line 139) | def _validate_config_section(
    method from_toml (line 152) | def from_toml(cls, config_path: Optional[str] = None) -> "R2RConfig":
    method to_toml (line 162) | def to_toml(self):
    method load_default_config (line 173) | def load_default_config(cls) -> dict:
    method _serialize_config (line 178) | def _serialize_config(config_section: Any):
    method _serialize_key (line 200) | def _serialize_key(key: Any) -> str:
    method load (line 204) | def load(

FILE: py/core/main/middleware/project_schema.py
  class ProjectSchemaMiddleware (line 13) | class ProjectSchemaMiddleware(BaseHTTPMiddleware):
    method __init__ (line 14) | def __init__(
    method dispatch (line 21) | async def dispatch(self, request: Request, call_next):

FILE: py/core/main/orchestration/hatchet/graph_workflow.py
  function hatchet_graph_search_results_factory (line 28) | def hatchet_graph_search_results_factory(

FILE: py/core/main/orchestration/hatchet/ingestion_workflow.py
  function hatchet_ingestion_factory (line 34) | def hatchet_ingestion_factory(

FILE: py/core/main/orchestration/simple/graph_workflow.py
  function simple_graph_search_results_factory (line 17) | def simple_graph_search_results_factory(service: GraphService):

FILE: py/core/main/orchestration/simple/ingestion_workflow.py
  function simple_ingestion_factory (line 25) | def simple_ingestion_factory(service: IngestionService):

FILE: py/core/main/services/auth_service.py
  class AuthService (line 17) | class AuthService(Service):
    method __init__ (line 18) | def __init__(
    method register (line 28) | async def register(
    method send_verification_email (line 46) | async def send_verification_email(
    method verify_email (line 51) | async def verify_email(
    method login (line 78) | async def login(self, email: str, password: str) -> dict[str, Token]:
    method user (line 81) | async def user(self, token: str) -> User:
    method refresh_access_token (line 96) | async def refresh_access_token(
    method change_password (line 101) | async def change_password(
    method request_password_reset (line 110) | async def request_password_reset(self, email: str) -> dict[str, str]:
    method confirm_password_reset (line 113) | async def confirm_password_reset(
    method logout (line 120) | async def logout(self, token: str) -> dict[str, str]:
    method update_user (line 123) | async def update_user(
    method delete_user (line 156) | async def delete_user(
    method clean_expired_blacklisted_tokens (line 214) | async def clean_expired_blacklisted_tokens(
    method get_user_verification_code (line 223) | async def get_user_verification_code(
    method get_user_reset_token (line 244) | async def get_user_reset_token(
    method send_reset_email (line 265) | async def send_reset_email(self, email: str) -> dict:
    method create_user_api_key (line 277) | async def create_user_api_key(
    method delete_user_api_key (line 295) | async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
    method list_user_api_keys (line 309) | async def list_user_api_keys(self, user_id: UUID) -> list[dict]:

FILE: py/core/main/services/base.py
  class Service (line 7) | class Service(ABC):
    method __init__ (line 8) | def __init__(

FILE: py/core/main/services/graph_service.py
  function _collect_async_results (line 39) | async def _collect_async_results(result_gen: AsyncGenerator) -> list[Any]:
  class GraphService (line 47) | class GraphService(Service):
    method __init__ (line 48) | def __init__(
    method create_entity (line 58) | async def create_entity(
    method update_entity (line 80) | async def update_entity(
    method delete_entity (line 104) | async def delete_entity(
    method get_entities (line 115) | async def get_entities(
    method create_relationship (line 133) | async def create_relationship(
    method delete_relationship (line 167) | async def delete_relationship(
    method update_relationship (line 180) | async def update_relationship(
    method get_relationships (line 214) | async def get_relationships(
    method create_community (line 231) | async def create_community(
    method update_community (line 254) | async def update_community(
    method delete_community (line 280) | async def delete_community(
    method get_communities (line 290) | async def get_communities(
    method list_graphs (line 307) | async def list_graphs(
    method update_graph (line 321) | async def update_graph(
    method reset_graph (line 333) | async def reset_graph(self, id: UUID) -> bool:
    method get_document_ids_for_create_graph (line 344) | async def get_document_ids_for_create_graph(
    method graph_search_results_entity_description (line 360) | async def graph_search_results_entity_description(
    method _describe_entities_in_document_batch (line 425) | async def _describe_entities_in_document_batch(
    method _process_entity_for_description (line 488) | async def _process_entity_for_description(
    method graph_search_results_clustering (line 589) | async def graph_search_results_clustering(
    method _perform_graph_clustering (line 610) | async def _perform_graph_clustering(
    method graph_search_results_community_summary (line 624) | async def graph_search_results_community_summary(
    method _summarize_communities (line 653) | async def _summarize_communities(
    method _process_community_summary (line 745) | async def _process_community_summary(
    method _community_summary_prompt (line 892) | async def _community_summary_prompt(
    method delete (line 947) | async def delete(
    method graph_search_results_extraction (line 956) | async def graph_search_results_extraction(
    method _extract_graph_search_results_from_chunk_group (line 1069) | async def _extract_graph_search_results_from_chunk_group(
    method _parse_graph_search_results_extraction_xml (line 1149) | async def _parse_graph_search_results_extraction_xml(
    method store_graph_search_results_extractions (line 1249) | async def store_graph_search_results_extractions(
    method deduplicate_document_entities (line 1303) | async def deduplicate_document_entities(

FILE: py/core/main/services/ingestion_service.py
  class IngestionService (line 42) | class IngestionService:
    method __init__ (line 46) | def __init__(
    method ingest_file_ingress (line 54) | async def ingest_file_ingress(
    method create_document_info_from_file (line 140) | def create_document_info_from_file(
    method _create_document_info_from_chunks (line 184) | def _create_document_info_from_chunks(
    method parse_file (line 216) | async def parse_file(
    method augment_document_info (line 293) | async def augment_document_info(
    method embed_document (line 338) | async def embed_document(
    method store_embeddings (line 425) | async def store_embeddings(
    method finalize_ingestion (line 526) | async def finalize_ingestion(
    method update_document_status (line 540) | async def update_document_status(
    method _update_document_status_in_db (line 551) | async def _update_document_status_in_db(
    method ingest_chunks_ingress (line 578) | async def ingest_chunks_ingress(
    method update_chunk_ingress (line 628) | async def update_chunk_ingress(
    method _get_enriched_chunk_text (line 705) | async def _get_enriched_chunk_text(
    method chunk_enrichment (line 794) | async def chunk_enrichment(
    method list_chunks (line 852) | async def list_chunks(
    method get_chunk (line 868) | async def get_chunk(
  class IngestionServiceAdapter (line 877) | class IngestionServiceAdapter:
    method _parse_user_data (line 879) | def _parse_user_data(user_data) -> User:
    method parse_ingest_file_input (line 890) | def parse_ingest_file_input(data: dict) -> dict:
    method parse_ingest_chunks_input (line 905) | def parse_ingest_chunks_input(data: dict) -> dict:
    method parse_update_chunk_input (line 918) | def parse_update_chunk_input(data: dict) -> dict:
    method parse_create_vector_index_input (line 929) | def parse_create_vector_index_input(data: dict) -> dict:
    method parse_list_vector_indices_input (line 941) | def parse_list_vector_indices_input(input_data: dict) -> dict:
    method parse_delete_vector_index_input (line 945) | def parse_delete_vector_index_input(input_data: dict) -> dict:
    method parse_select_vector_index_input (line 953) | def parse_select_vector_index_input(input_data: dict) -> dict:

FILE: py/core/main/services/maintenance_service.py
  class MaintenanceService (line 12) | class MaintenanceService(Service):
    method __init__ (line 13) | def __init__(
    method initialize (line 24) | async def initialize(self):
    method _parse_cron_schedule (line 49) | def _parse_cron_schedule(self, cron_schedule: str) -> dict:
    method vacuum_database (line 81) | async def vacuum_database(self, full: bool = False, analyze: bool = Tr...
    method vacuum_table (line 99) | async def vacuum_table(

FILE: py/core/main/services/management_service.py
  class ManagementService (line 31) | class ManagementService(Service):
    method __init__ (line 32) | def __init__(
    method app_settings (line 42) | async def app_settings(self):
    method users_overview (line 58) | async def users_overview(
    method delete_documents_and_chunks_by_filter (line 70) | async def delete_documents_and_chunks_by_filter(
    method download_file (line 232) | async def download_file(
    method export_files (line 239) | async def export_files(
    method export_collections (line 251) | async def export_collections(
    method export_documents (line 263) | async def export_documents(
    method export_document_entities (line 275) | async def export_document_entities(
    method export_document_relationships (line 290) | async def export_document_relationships(
    method export_conversations (line 305) | async def export_conversations(
    method export_graph_entities (line 317) | async def export_graph_entities(
    method export_graph_relationships (line 332) | async def export_graph_relationships(
    method export_graph_communities (line 347) | async def export_graph_communities(
    method export_messages (line 362) | async def export_messages(
    method export_users (line 374) | async def export_users(
    method documents_overview (line 386) | async def documents_overview(
    method update_document_metadata (line 404) | async def update_document_metadata(
    method list_document_chunks (line 416) | async def list_document_chunks(
    method assign_document_to_collection (line 432) | async def assign_document_to_collection(
    method remove_document_from_collection (line 454) | async def remove_document_from_collection(
    method _process_relationships (line 468) | def _process_relationships(
    method generate_output (line 482) | def generate_output(
    method _count_connected_components (line 528) | def _count_connected_components(self, graph: dict[str, list[str]]) -> ...
    method _get_central_nodes (line 545) | def _get_central_nodes(
    method create_collection (line 555) | async def create_collection(
    method update_collection (line 573) | async def update_collection(
    method delete_collection (line 590) | async def delete_collection(self, collection_id: UUID) -> bool:
    method collections_overview (line 607) | async def collections_overview(
    method add_user_to_collection (line 625) | async def add_user_to_collection(
    method remove_user_from_collection (line 634) | async def remove_user_from_collection(
    method get_users_in_collection (line 641) | async def get_users_in_collection(
    method documents_in_collection (line 648) | async def documents_in_collection(
    method summarize_collection (line 655) | async def summarize_collection(
    method add_prompt (line 694) | async def add_prompt(
    method get_cached_prompt (line 705) | async def get_cached_prompt(
    method get_prompt (line 724) | async def get_prompt(
    method get_all_prompts (line 739) | async def get_all_prompts(self) -> dict[str, Prompt]:
    method update_prompt (line 742) | async def update_prompt(
    method delete_prompt (line 756) | async def delete_prompt(self, name: str) -> dict:
    method get_conversation (line 763) | async def get_conversation(
    method create_conversation (line 773) | async def create_conversation(
    method conversations_overview (line 783) | async def conversations_overview(
    method add_message (line 797) | async def add_message(
    method edit_message (line 811) | async def edit_message(
    method update_conversation (line 825) | async def update_conversation(
    method delete_conversation (line 832) | async def delete_conversation(
    method get_user_max_documents (line 844) | async def get_user_max_documents(self, user_id: UUID) -> int | None:
    method get_user_max_chunks (line 853) | async def get_user_max_chunks(self, user_id: UUID) -> int | None:
    method get_user_max_collections (line 861) | async def get_user_max_collections(self, user_id: UUID) -> int | None:
    method get_max_upload_size_by_type (line 872) | async def get_max_upload_size_by_type(
    method get_all_user_limits (line 920) | async def get_all_user_limits(self, user_id: UUID) -> dict[str, Any]:

FILE: py/core/main/services/retrieval_service.py
  class AgentFactory (line 60) | class AgentFactory:
    method create_agent (line 67) | def create_agent(
  class RetrievalService (line 248) | class RetrievalService(Service):
    method __init__ (line 249) | def __init__(
    method search (line 259) | async def search(
    method _basic_search (line 281) | async def _basic_search(
    method _rag_fusion_search (line 326) | async def _rag_fusion_search(
    method _generate_similar_queries (line 405) | async def _generate_similar_queries(
    method _reciprocal_rank_fusion_chunks (line 450) | def _reciprocal_rank_fusion_chunks(
    method _reciprocal_rank_fusion_graphs (line 509) | def _reciprocal_rank_fusion_graphs(
    method _hyde_search (line 554) | async def _hyde_search(
    method _fanout_chunk_and_graph_search (line 616) | async def _fanout_chunk_and_graph_search(
    method _vector_search_logic (line 651) | async def _vector_search_logic(
    method _graph_search_logic (line 731) | async def _graph_search_logic(
    method _run_hyde_generation (line 899) | async def _run_hyde_generation(
    method search_documents (line 942) | async def search_documents(
    method completion (line 963) | async def completion(
    method embedding (line 977) | async def embedding(
    method rag (line 985) | async def rag(
    method _find_item_by_shortid (line 1248) | def _find_item_by_shortid(
    method agent (line 1268) | async def agent(
    method get_context (line 1718) | async def get_context(
    method _parse_user_and_collection_filters (line 1774) | def _parse_user_and_collection_filters(
    method _build_documents_context (line 1821) | async def _build_documents_context(
    method _build_aware_system_instruction (line 1872) | async def _build_aware_system_instruction(
    method _perform_web_search (line 1937) | async def _perform_web_search(
  class RetrievalServiceAdapter (line 1989) | class RetrievalServiceAdapter:
    method _parse_user_data (line 1991) | def _parse_user_data(user_data):
    method prepare_search_input (line 2002) | def prepare_search_input(
    method parse_search_input (line 2014) | def parse_search_input(data: dict):
    method prepare_rag_input (line 2024) | def prepare_rag_input(
    method parse_rag_input (line 2042) | def parse_rag_input(data: dict):
    method prepare_agent_input (line 2057) | def prepare_agent_input(
    method parse_agent_input (line 2077) | def parse_agent_input(data: dict):

FILE: py/core/parsers/media/audio_parser.py
  class AudioParser (line 19) | class AudioParser(AsyncParser[bytes]):
    method __init__ (line 22) | def __init__(
    method ingest (line 33) | async def ingest(  # type: ignore

FILE: py/core/parsers/media/bmp_parser.py
  class BMPParser (line 12) | class BMPParser(AsyncParser[str | bytes]):
    method __init__ (line 15) | def __init__(
    method extract_bmp_metadata (line 29) | async def extract_bmp_metadata(self, data: bytes) -> dict:
    method ingest (line 65) | async def ingest(

FILE: py/core/parsers/media/doc_parser.py
  class DOCParser (line 16) | class DOCParser(AsyncParser[str | bytes]):
    method __init__ (line 19) | def __init__(
    method ingest (line 30) | async def ingest(
    method _extract_text (line 76) | def _extract_text(self, word_stream: bytes, table_stream: bytes) -> str:
    method _clean_text (line 94) | def _clean_text(self, text: str) -> list[str]:

FILE: py/core/parsers/media/docx_parser.py
  class DOCXParser (line 15) | class DOCXParser(AsyncParser[str | bytes]):
    method __init__ (line 18) | def __init__(
    method ingest (line 29) | async def ingest(

FILE: py/core/parsers/media/img_parser.py
  class ImageParser (line 22) | class ImageParser(AsyncParser[str | bytes]):
    method __init__ (line 36) | def __init__(
    method _is_heic (line 50) | def _is_heic(self, data: bytes) -> bool:
    method _convert_heic_to_jpeg (line 71) | async def _convert_heic_to_jpeg(self, data: bytes) -> bytes:
    method _convert_tiff_to_jpeg (line 99) | async def _convert_tiff_to_jpeg(self, data: bytes) -> bytes:
    method _is_jpeg (line 117) | def _is_jpeg(self, data: bytes) -> bool:
    method _is_png (line 121) | def _is_png(self, data: bytes) -> bool:
    method _is_bmp (line 126) | def _is_bmp(self, data: bytes) -> bool:
    method _is_tiff (line 130) | def _is_tiff(self, data: bytes) -> bool:
    method _get_image_media_type (line 137) | def _get_image_media_type(
    method ingest (line 186) | async def ingest(

FILE: py/core/parsers/media/odt_parser.py
  class ODTParser (line 14) | class ODTParser(AsyncParser[str | bytes]):
    method __init__ (line 15) | def __init__(
    method ingest (line 27) | async def ingest(

FILE: py/core/parsers/media/pdf_parser.py
  class OCRPDFParser (line 28) | class OCRPDFParser(AsyncParser[str | bytes]):
    method __init__ (line 35) | def __init__(
    method ingest (line 46) | async def ingest(
  class VLMPDFParser (line 73) | class VLMPDFParser(AsyncParser[str | bytes]):
    method __init__ (line 76) | def __init__(
    method process_page (line 96) | async def process_page(self, image, page_num: int) -> dict[str, str]:
    method process_and_yield (line 228) | async def process_and_yield(self, image, page_num: int):
    method ingest (line 237) | async def ingest(
  class BasicPDFParser (line 360) | class BasicPDFParser(AsyncParser[str | bytes]):
    method __init__ (line 363) | def __init__(
    method ingest (line 374) | async def ingest(
  class PDFParserUnstructured (line 415) | class PDFParserUnstructured(AsyncParser[str | bytes]):
    method __init__ (line 416) | def __init__(
    method ingest (line 434) | async def ingest(

FILE: py/core/parsers/media/ppt_parser.py
  class PPTParser (line 16) | class PPTParser(AsyncParser[str | bytes]):
    method __init__ (line 19) | def __init__(
    method _extract_text_from_record (line 30) | def _extract_text_from_record(self, data: bytes) -> str:
    method ingest (line 40) | async def ingest(

FILE: py/core/parsers/media/pptx_parser.py
  class PPTXParser (line 15) | class PPTXParser(AsyncParser[str | bytes]):
    method __init__ (line 18) | def __init__(
    method ingest (line 29) | async def ingest(

FILE: py/core/parsers/media/rtf_parser.py
  class RTFParser (line 14) | class RTFParser(AsyncParser[str | bytes]):
    method __init__ (line 17) | def __init__(
    method ingest (line 28) | async def ingest(

FILE: py/core/parsers/structured/csv_parser.py
  class CSVParser (line 12) | class CSVParser(AsyncParser[str | bytes]):
    method __init__ (line 15) | def __init__(
    method ingest (line 31) | async def ingest(
  class CSVParserAdvanced (line 42) | class CSVParserAdvanced(AsyncParser[str | bytes]):
    method __init__ (line 45) | def __init__(
    method get_delimiter (line 57) | def get_delimiter(
    method ingest (line 73) | async def ingest(

FILE: py/core/parsers/structured/eml_parser.py
  class EMLParser (line 13) | class EMLParser(AsyncParser[str | bytes]):
    method __init__ (line 16) | def __init__(
    method ingest (line 26) | async def ingest(

FILE: py/core/parsers/structured/epub_parser.py
  class EPUBParser (line 17) | class EPUBParser(AsyncParser[str | bytes]):
    method __init__ (line 20) | def __init__(
    method _safe_get_metadata (line 31) | def _safe_get_metadata(self, book, field: str) -> str | None:
    method _clean_text (line 39) | def _clean_text(self, content: bytes) -> str:
    method ingest (line 56) | async def ingest(

FILE: py/core/parsers/structured/json_parser.py
  class JSONParser (line 15) | class JSONParser(AsyncParser[str | bytes]):
    method __init__ (line 18) | def __init__(
    method ingest (line 28) | async def ingest(
    method _parse_json (line 62) | def _parse_json(self, data: dict) -> str:

FILE: py/core/parsers/structured/msg_parser.py
  class MSGParser (line 16) | class MSGParser(AsyncParser[str | bytes]):
    method __init__ (line 19) | def __init__(
    method ingest (line 29) | async def ingest(

FILE: py/core/parsers/structured/org_parser.py
  class ORGParser (line 14) | class ORGParser(AsyncParser[str | bytes]):
    method __init__ (line 17) | def __init__(
    method _process_node (line 28) | def _process_node(self, node) -> list[str]:
    method ingest (line 42) | async def ingest(

FILE: py/core/parsers/structured/p7s_parser.py
  class P7SParser (line 23) | class P7SParser(AsyncParser[str | bytes]):
    method __init__ (line 26) | def __init__(
    method _format_datetime (line 39) | def _format_datetime(self, dt: datetime) -> str:
    method _get_name_attribute (line 43) | def _get_name_attribute(self, name, oid):
    method _extract_cert_info (line 50) | def _extract_cert_info(self, cert) -> dict:
    method _try_parse_signature (line 86) | def _try_parse_signature(self, data: bytes):
    method _extract_p7s_data_from_mime (line 111) | def _extract_p7s_data_from_mime(self, raw_data: bytes) -> bytes:
    method ingest (line 144) | async def ingest(

FILE: py/core/parsers/structured/rst_parser.py
  class RSTParser (line 15) | class RSTParser(AsyncParser[str | bytes]):
    method __init__ (line 18) | def __init__(
    method ingest (line 30) | async def ingest(

FILE: py/core/parsers/structured/tsv_parser.py
  class TSVParser (line 12) | class TSVParser(AsyncParser[str | bytes]):
    method __init__ (line 15) | def __init__(
    method ingest (line 31) | async def ingest(
  class TSVParserAdvanced (line 42) | class TSVParserAdvanced(AsyncParser[str | bytes]):
    method __init__ (line 45) | def __init__(
    method validate_tsv (line 57) | def validate_tsv(self, file: IO[bytes]) -> bool:
    method ingest (line 70) | async def ingest(

FILE: py/core/parsers/structured/xls_parser.py
  class XLSParser (line 16) | class XLSParser(AsyncParser[str | bytes]):
    method __init__ (line 19) | def __init__(
    method ingest (line 30) | async def ingest(
  class XLSParserAdvanced (line 66) | class XLSParserAdvanced(AsyncParser[str | bytes]):
    method __init__ (line 69) | def __init__(
    method connected_components (line 78) | def connected_components(self, arr):
    method get_cell_value (line 89) | def get_cell_value(self, cell, workbook):
    method ingest (line 105) | async def ingest(

FILE: py/core/parsers/structured/xlsx_parser.py
  class XLSXParser (line 17) | class XLSXParser(AsyncParser[str | bytes]):
    method __init__ (line 20) | def __init__(
    method ingest (line 31) | async def ingest(
  class XLSXParserAdvanced (line 44) | class XLSXParserAdvanced(AsyncParser[str | bytes]):
    method __init__ (line 48) | def __init__(
    method connected_components (line 57) | def connected_components(self, arr):
    method ingest (line 72) | async def ingest(

FILE: py/core/parsers/text/css_parser.py
  class CSSParser (line 13) | class CSSParser(AsyncParser[str | bytes]):
    method __init__ (line 16) | def __init__(
    method ingest (line 26) | async def ingest(
    method _process_css_content (line 46) | def _process_css_content(self, css: str) -> str:
    method _extract_comments (line 74) | def _extract_comments(self, css: str) -> list[str]:
    method _extract_rules (line 80) | def _extract_rules(self, css: str) -> list[str]:

FILE: py/core/parsers/text/html_parser.py
  class HTMLParser (line 14) | class HTMLParser(AsyncParser[str | bytes]):
    method __init__ (line 17) | def __init__(
    method ingest (line 27) | async def ingest(

FILE: py/core/parsers/text/js_parser.py
  class JSParser (line 13) | class JSParser(AsyncParser[str | bytes]):
    method __init__ (line 16) | def __init__(
    method ingest (line 26) | async def ingest(
    method _process_js_content (line 46) | def _process_js_content(self, js: str) -> str:
    method _extract_comments (line 101) | def _extract_comments(self, js: str) -> list[str]:
    method _extract_imports_exports (line 126) | def _extract_imports_exports(self, js: str) -> list[str]:
    method _extract_functions (line 147) | def _extract_functions(self, js: str) -> list[str]:
    method _extract_classes (line 183) | def _extract_classes(self, js: str) -> list[str]:
    method _extract_variables (line 206) | def _extract_variables(self, js: str) -> list[str]:
    method _remove_comments (line 224) | def _remove_comments(self, js: str) -> str:

FILE: py/core/parsers/text/md_parser.py
  class MDParser (line 14) | class MDParser(AsyncParser[str | bytes]):
    method __init__ (line 17) | def __init__(
    method ingest (line 31) | async def ingest(

FILE: py/core/parsers/text/python_parser.py
  class PythonParser (line 13) | class PythonParser(AsyncParser[str | bytes]):
    method __init__ (line 16) | def __init__(
    method ingest (line 26) | async def ingest(
    method _process_python_code (line 46) | def _process_python_code(self, code: str) -> str:
    method _extract_module_docstring (line 81) | def _extract_module_docstring(self, code: str) -> str:
    method _extract_imports (line 96) | def _extract_imports(self, lines: list[str]) -> list[str]:
    method _extract_definitions (line 107) | def _extract_definitions(self, code: str) -> list[str]:

FILE: py/core/parsers/text/text_parser.py
  class TextParser (line 12) | class TextParser(AsyncParser[str | bytes]):
    method __init__ (line 15) | def __init__(
    method ingest (line 25) | async def ingest(

FILE: py/core/parsers/text/ts_parser.py
  class TSParser (line 13) | class TSParser(AsyncParser[str | bytes]):
    method __init__ (line 16) | def __init__(
    method ingest (line 26) | async def ingest(
    method _process_ts_code (line 46) | def _process_ts_code(self, code: str) -> str:
    method _extract_file_comment (line 81) | def _extract_file_comment(self, code: str) -> str:
    method _extract_imports_exports (line 97) | def _extract_imports_exports(self, lines: list[str]) -> list[str]:
    method _extract_definitions (line 109) | def _extract_definitions(self, code: str) -> list[str]:

FILE: py/core/providers/auth/clerk.py
  class ClerkAuthProvider (line 19) | class ClerkAuthProvider(JwtAuthProvider):
    method __init__ (line 25) | def __init__(
    method decode_token (line 52) | async def decode_token(self, token: str) -> TokenData:

FILE: py/core/providers/auth/jwt.py
  class JwtAuthProvider (line 26) | class JwtAuthProvider(AuthProvider):
    method __init__ (line 27) | def __init__(
    method login (line 38) | async def login(self, email: str, password: str) -> dict[str, Token]:
    method oauth_callback (line 41) | async def oauth_callback(self, code: str) -> dict[str, Token]:
    method user (line 44) | async def user(self, token: str) -> User:
    method change_password (line 47) | async def change_password(
    method confirm_password_reset (line 52) | async def confirm_password_reset(
    method create_access_token (line 57) | def create_access_token(self, data: dict) -> str:
    method create_refresh_token (line 60) | def create_refresh_token(self, data: dict) -> str:
    method decode_token (line 63) | async def decode_token(self, token: str) -> TokenData:
    method refresh_access_token (line 107) | async def refresh_access_token(
    method get_current_active_user (line 112) | def get_current_active_user(
    method logout (line 120) | async def logout(self, token: str) -> dict[str, str]:
    method register (line 123) | async def register(
    method request_password_reset (line 134) | async def request_password_reset(self, email: str) -> dict[str, str]:
    method send_reset_email (line 137) | async def send_reset_email(self, email: str) -> dict[str, str]:
    method create_user_api_key (line 140) | async def create_user_api_key(
    method verify_email (line 148) | async def verify_email(
    method send_verification_email (line 153) | async def send_verification_email(
    method list_user_api_keys (line 158) | async def list_user_api_keys(self, user_id: UUID) -> list[dict]:
    method delete_user_api_key (line 161) | async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
    method oauth_callback_handler (line 164) | async def oauth_callback_handler(

FILE: py/core/providers/auth/r2r_auth.py
  function normalize_email (line 31) | def normalize_email(email: str) -> str:
  class R2RAuthProvider (line 44) | class R2RAuthProvider(AuthProvider):
    method __init__ (line 45) | def __init__(
    method initialize (line 72) | async def initialize(self):
    method create_access_token (line 85) | def create_access_token(self, data: dict) -> str:
    method create_refresh_token (line 96) | def create_refresh_token(self, data: dict) -> str:
    method decode_token (line 106) | async def decode_token(self, token: str) -> TokenData:
    method authenticate_api_key (line 147) | async def authenticate_api_key(self, api_key: str) -> User:
    method user (line 182) | async def user(self, token: str = Depends(oauth2_scheme)) -> User:
    method get_current_active_user (line 208) | def get_current_active_user(
    method register (line 215) | async def register(
    method send_verification_email (line 283) | async def send_verification_email(
    method verify_email (line 319) | async def verify_email(
    method login (line 333) | async def login(self, email: str, password: str) -> dict[str, Token]:
    method refresh_access_token (line 392) | async def refresh_access_token(
    method change_password (line 419) | async def change_password(
    method request_password_reset (line 464) | async def request_password_reset(self, email: str) -> dict[str, str]:
    method confirm_password_reset (line 501) | async def confirm_password_reset(
    method logout (line 545) | async def logout(self, token: str) -> dict[str, str]:
    method clean_expired_blacklisted_tokens (line 549) | async def clean_expired_blacklisted_tokens(self):
    method send_reset_email (line 552) | async def send_reset_email(self, email: str) -> dict:
    method create_user_api_key (line 563) | async def create_user_api_key(
    method list_user_api_keys (line 589) | async def list_user_api_keys(self, user_id: UUID) -> list[dict]:
    method delete_user_api_key (line 594) | async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
    method rename_api_key (line 600) | async def rename_api_key(
    method oauth_callback_handler (line 609) | async def oauth_callback_handler(

FILE: py/core/providers/auth/supabase.py
  class SupabaseAuthProvider (line 30) | class SupabaseAuthProvider(AuthProvider):
    method __init__ (line 31) | def __init__(
    method initialize (line 56) | async def initialize(self):
    method create_access_token (line 60) | def create_access_token(self, data: dict) -> str:
    method create_refresh_token (line 65) | def create_refresh_token(self, data: dict) -> str:
    method decode_token (line 70) | async def decode_token(self, token: str) -> TokenData:
    method register (line 108) | async def register(
    method send_verification_email (line 129) | async def send_verification_email(
    method verify_email (line 136) | async def verify_email(
    method login (line 147) | async def login(self, email: str, password: str) -> dict[str, Token]:
    method refresh_access_token (line 175) | async def refresh_access_token(
    method user (line 202) | async def user(self, token: str = Depends(oauth2_scheme)) -> User:
    method get_current_active_user (line 225) | def get_current_active_user(
    method change_password (line 233) | async def change_password(
    method request_password_reset (line 251) | async def request_password_reset(self, email: str) -> dict[str, str]:
    method confirm_password_reset (line 287) | async def confirm_password_reset(
    method logout (line 294) | async def logout(self, token: str) -> dict[str, str]:
    method clean_expired_blacklisted_tokens (line 303) | async def clean_expired_blacklisted_tokens(self):
    method send_reset_email (line 307) | async def send_reset_email(self, email: str) -> dict[str, str]:
    method create_user_api_key (line 310) | async def create_user_api_key(
    method list_user_api_keys (line 320) | async def list_user_api_keys(self, user_id: UUID) -> list[dict]:
    method delete_user_api_key (line 325) | async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
    method oauth_callback_handler (line 330) | async def oauth_callback_handler(

FILE: py/core/providers/crypto/bcrypt.py
  class BcryptCryptoConfig (line 20) | class BcryptCryptoConfig(CryptoConfig):
    method supported_providers (line 28) | def supported_providers(self) -> list[str]:
    method validate_config (line 31) | def validate_config(self) -> None:
    method verify_password (line 38) | def verify_password(
  class BCryptCryptoProvider (line 51) | class BCryptCryptoProvider(CryptoProvider, ABC):
    method __init__ (line 52) | def __init__(self, config: BcryptCryptoConfig):
    method get_password_hash (line 73) | def get_password_hash(self, password: str) -> str:
    method verify_password (line 81) | def verify_password(
    method generate_verification_code (line 111) | def generate_verification_code(self, length: int = 32) -> str:
    method generate_signing_keypair (line 115) | def generate_signing_keypair(self) -> Tuple[str, str, str]:
    method sign_request (line 127) | def sign_request(self, private_key: str, data: str) -> str:
    method verify_request_signature (line 138) | def verify_request_signature(
    method generate_api_key (line 150) | def generate_api_key(self) -> Tuple[str, str]:
    method hash_api_key (line 161) | def hash_api_key(self, raw_api_key: str) -> str:
    method verify_api_key (line 169) | def verify_api_key(self, raw_api_key: str, hashed_key: str) -> bool:
    method generate_secure_token (line 173) | def generate_secure_token(self, data: dict, expiry: datetime) -> str:
    method verify_secure_token (line 185) | def verify_secure_token(self, token: str) -> Optional[dict]:

FILE: py/core/providers/crypto/nacl.py
  function encode_bytes_readable (line 21) | def encode_bytes_readable(random_bytes: bytes, chars: str) -> str:
  class NaClCryptoConfig (line 34) | class NaClCryptoConfig(CryptoConfig):
  class NaClCryptoProvider (line 46) | class NaClCryptoProvider(CryptoProvider):
    method __init__ (line 47) | def __init__(self, config: NaClCryptoConfig):
    method get_password_hash (line 64) | def get_password_hash(self, password: str) -> str:
    method verify_password (line 73) | def verify_password(
    method generate_verification_code (line 83) | def generate_verification_code(self, length: int = 32) -> str:
    method generate_api_key (line 87) | def generate_api_key(self) -> Tuple[str, str]:
    method hash_api_key (line 103) | def hash_api_key(self, raw_api_key: str) -> str:
    method verify_api_key (line 111) | def verify_api_key(self, raw_api_key: str, hashed_key: str) -> bool:
    method sign_request (line 119) | def sign_request(self, private_key: str, data: str) -> str:
    method verify_request_signature (line 130) | def verify_request_signature(
    method generate_secure_token (line 142) | def generate_secure_token(self, data: dict, expiry: datetime) -> str:
    method verify_secure_token (line 159) | def verify_secure_token(self, token: str) -> Optional[dict]:
    method generate_signing_keypair (line 172) | def generate_signing_keypair(self) -> Tuple[str, str, str]:

FILE: py/core/providers/database/base.py
  class SemaphoreConnectionPool (line 14) | class SemaphoreConnectionPool:
    method __init__ (line 15) | def __init__(self, connection_string, postgres_configuration_settings):
    method initialize (line 19) | async def initialize(self):
    method get_connection (line 44) | async def get_connection(self):
    method close (line 49) | async def close(self):
  class QueryBuilder (line 53) | class QueryBuilder:
    method __init__ (line 54) | def __init__(self, table_name: str):
    method select (line 68) | def select(self, fields: list[str]):
    method insert (line 72) | def insert(self, data: dict):
    method update (line 77) | def update(self, data: dict):
    method delete (line 82) | def delete(self):
    method where (line 86) | def where(self, condition: str):
    method limit (line 90) | def limit(self, value: Optional[int]):
    method offset (line 94) | def offset(self, value: int):
    method order_by (line 98) | def order_by(self, fields: str):
    method returning (line 102) | def returning(self, fields: list[str]):
    method build (line 106) | def build(self):
  class PostgresConnectionManager (line 151) | class PostgresConnectionManager(DatabaseConnectionManager):
    method __init__ (line 152) | def __init__(self):
    method initialize (line 155) | async def initialize(self, pool: SemaphoreConnectionPool):
    method execute_query (line 158) | async def execute_query(self, query, params=None, isolation_level=None):
    method execute_many (line 174) | async def execute_many(self, query, params=None, batch_size=1000):
    method fetch_query (line 189) | async def fetch_query(self, query, params=None):
    method fetchrow_query (line 218) | async def fetchrow_query(self, query, params=None):
    method transaction (line 229) | async def transaction(self, isolation_level=None):

FILE: py/core/providers/database/chunks.py
  function index_measure_to_ops (line 34) | def index_measure_to_ops(
  function quantize_vector_to_binary (line 41) | def quantize_vector_to_binary(
  class HybridSearchIntermediateResult (line 68) | class HybridSearchIntermediateResult(TypedDict):
  class PostgresChunksHandler (line 75) | class PostgresChunksHandler(Handler):
    method __init__ (line 78) | def __init__(
    method create_tables (line 89) | async def create_tables(self):
    method upsert (line 185) | async def upsert(self, entry: VectorEntry) -> None:
    method upsert_entries (line 255) | async def upsert_entries(self, entries: list[VectorEntry]) -> None:
    method semantic_search (line 327) | async def semantic_search(
    method full_text_search (line 484) | async def full_text_search(
    method hybrid_search (line 538) | async def hybrid_search(
    method delete (line 642) | async def delete(
    method assign_document_chunks_to_collection (line 668) | async def assign_document_chunks_to_collection(
    method remove_document_from_collection_vector (line 680) | async def remove_document_from_collection_vector(
    method delete_user_vector (line 692) | async def delete_user_vector(self, owner_id: UUID) -> None:
    method delete_collection_vector (line 699) | async def delete_collection_vector(self, collection_id: UUID) -> None:
    method list_document_chunks (line 708) | async def list_document_chunks(
    method get_chunk (line 752) | async def get_chunk(self, id: UUID) -> dict:
    method create_index (line 774) | async def create_index(
    method list_indices (line 914) | async def list_indices(
    method delete_index (line 999) | async def delete_index(
    method list_chunks (line 1081) | async def list_chunks(
    method search_documents (line 1149) | async def search_documents(
    method _get_index_options (line 1290) | def _get_index_options(

FILE: py/core/providers/database/collections.py
  class PostgresCollectionsHandler (line 30) | class PostgresCollectionsHandler(Handler):
    method __init__ (line 33) | def __init__(
    method create_tables (line 42) | async def create_tables(self) -> None:
    method collection_exists (line 115) | async def collection_exists(self, collection_id: UUID) -> bool:
    method create_collection (line 126) | async def create_collection(
    method update_collection (line 183) | async def update_collection(
    method delete_collection_relational (line 256) | async def delete_collection_relational(self, collection_id: UUID) -> N...
    method documents_in_collection (line 294) | async def documents_in_collection(
    method get_collections_overview (line 351) | async def get_collections_overview(
    method assign_document_to_collection_relational (line 431) | async def assign_document_to_collection_relational(
    method remove_document_from_collection_relational (line 504) | async def remove_document_from_collection_relational(
    method decrement_collection_document_count (line 539) | async def decrement_collection_document_count(
    method export_to_csv (line 557) | async def export_to_csv(
    method get_collection_by_name (line 677) | async def get_collection_by_name(

FILE: py/core/providers/database/conversations.py
  function _validate_image_size (line 22) | def _validate_image_size(
  function _json_default (line 53) | def _json_default(obj: Any) -> str:
  function safe_dumps (line 69) | def safe_dumps(obj: Any) -> str:
  class PostgresConversationsHandler (line 74) | class PostgresConversationsHandler(Handler):
    method __init__ (line 75) | def __init__(
    method create_tables (line 81) | async def create_tables(self):
    method create_conversation (line 106) | async def create_conversation(
    method get_conversations_overview (line 133) | async def get_conversations_overview(
    method add_message (line 207) | async def add_message(
    method edit_message (line 296) | async def edit_message(
    method update_message_metadata (line 379) | async def update_message_metadata(
    method get_conversation (line 405) | async def get_conversation(
    method update_conversation (line 489) | async def update_conversation(
    method delete_conversation (line 524) | async def delete_conversation(
    method export_conversations_to_csv (line 569) | async def export_conversations_to_csv(
    method export_messages_to_csv (line 671) | async def export_messages_to_csv(

FILE: py/core/providers/database/documents.py
  function transform_filter_fields (line 31) | def transform_filter_fields(filters: dict[str, Any]) -> dict[str, Any]:
  class PostgresDocumentsHandler (line 70) | class PostgresDocumentsHandler(Handler):
    method __init__ (line 73) | def __init__(
    method create_tables (line 82) | async def create_tables(self):
    method upsert_documents_overview (line 198) | async def upsert_documents_overview(
    method delete (line 331) | async def delete(
    method _get_status_from_table (line 347) | async def _get_status_from_table(
    method _get_ids_from_table (line 373) | async def _get_ids_from_table(
    method _set_status_in_table (line 396) | async def _set_status_in_table(
    method _get_status_model (line 420) | def _get_status_model(self, status_type: str):
    method get_workflow_status (line 440) | async def get_workflow_status(
    method set_workflow_status (line 465) | async def set_workflow_status(
    method get_document_ids_by_status (line 486) | async def get_document_ids_by_status(
    method get_documents_overview (line 508) | async def get_documents_overview(
    method update_document_metadata (line 715) | async def update_document_metadata(
    method semantic_document_search (line 753) | async def semantic_document_search(
    method full_text_document_search (line 844) | async def full_text_document_search(
    method hybrid_document_search (line 933) | async def hybrid_document_search(
    method search_documents (line 1025) | async def search_documents(
    method export_to_csv (line 1057) | async def export_to_csv(

FILE: py/core/providers/database/filters.py
  class FilterOperator (line 6) | class FilterOperator:
  class FilterError (line 60) | class FilterError(ValueError):
  class ParamHelper (line 69) | class ParamHelper:
    method __init__ (line 72) | def __init__(self, initial_params: Optional[list[Any]] = None):
    method add (line 76) | def add(self, value: Any) -> str:
  function _process_filter_dict (line 87) | def _process_filter_dict(
  function _process_field_condition (line 183) | def _process_field_condition(
  function _build_array_literal (line 340) | def _build_array_literal(
  function _build_standard_column_condition (line 350) | def _build_standard_column_condition(
  function _build_collection_ids_condition (line 432) | def _build_collection_ids_condition(
  function _build_metadata_condition (line 529) | def _build_metadata_condition(
  function _build_metadata_operator_condition (line 623) | def _build_metadata_operator_condition(
  function apply_filters (line 811) | def apply_filters(

FILE: py/core/providers/database/graphs.py
  class PostgresEntitiesHandler (line 41) | class PostgresEntitiesHandler(Handler):
    method __init__ (line 42) | def __init__(self, *args: Any, **kwargs: Any) -> None:
    method _get_table_name (line 55) | def _get_table_name(self, table: str) -> str:
    method _get_entity_table_for_store (line 59) | def _get_entity_table_for_store(self, store_type: StoreType) -> str:
    method _get_parent_constraint (line 63) | def _get_parent_constraint(self, store_type: StoreType) -> str:
    method create_tables (line 80) | async def create_tables(self) -> None:
    method create (line 113) | async def create(
    method get (line 166) | async def get(
    method update (line 245) | async def update(
    method delete (line 323) | async def delete(
    method get_duplicate_name_blocks (line 375) | async def get_duplicate_name_blocks(
    method merge_duplicate_name_blocks (line 424) | async def merge_duplicate_name_blocks(
    method _insert_merged_entity (line 492) | async def _insert_merged_entity(
    method _create_merged_entity (line 518) | async def _create_merged_entity(self, entities: list[Entity]) -> Entity:
    method export_to_csv (line 563) | async def export_to_csv(
  class PostgresRelationshipsHandler (line 683) | class PostgresRelationshipsHandler(Handler):
    method __init__ (line 684) | def __init__(self, *args: Any, **kwargs: Any) -> None:
    method _get_table_name (line 694) | def _get_table_name(self, table: str) -> str:
    method _get_relationship_table_for_store (line 698) | def _get_relationship_table_for_store(self, store_type: StoreType) -> ...
    method _get_parent_constraint (line 702) | def _get_parent_constraint(self, store_type: StoreType) -> str:
    method create_tables (line 719) | async def create_tables(self) -> None:
    method create (line 762) | async def create(
    method get (line 828) | async def get(
    method update (line 931) | async def update(
    method delete (line 1033) | async def delete(
    method export_to_csv (line 1081) | async def export_to_csv(
  class PostgresCommunitiesHandler (line 1213) | class PostgresCommunitiesHandler(Handler):
    method __init__ (line 1214) | def __init__(self, *args: Any, **kwargs: Any) -> None:
    method create_tables (line 1224) | async def create_tables(self) -> None:
    method create (line 1249) | async def create(
    method update (line 1305) | async def update(
    method delete (line 1385) | async def delete(
    method delete_all_communities (line 1408) | async def delete_all_communities(
    method get (line 1430) | async def get(
    method export_to_csv (line 1501) | async def export_to_csv(
  class PostgresGraphsHandler (line 1632) | class PostgresGraphsHandler(Handler):
    method __init__ (line 1637) | def __init__(
    method create_tables (line 1664) | async def create_tables(self) -> None:
    method create (line 1688) | async def create(
    method reset (line 1736) | async def reset(self, parent_id: UUID) -> None:
    method list_graphs (line 1756) | async def list_graphs(
    method get (line 1832) | async def get(
    method add_documents (line 1874) | async def add_documents(self, id: UUID, document_ids: list[UUID]) -> b...
    method update (line 1927) | async def update(
    method get_entities (line 1985) | async def get_entities(
    method get_relationships (line 2069) | async def get_relationships(
    method add_entities (line 2152) | async def add_entities(
    method get_all_relationships (line 2192) | async def get_all_relationships(
    method has_document (line 2207) | async def has_document(self, graph_id: UUID, document_id: UUID) -> bool:
    method get_communities (line 2239) | async def get_communities(
    method add_community (line 2306) | async def add_community(self, community: Community) -> None:
    method delete (line 2334) | async def delete(self, collection_id: UUID) -> None:
    method perform_graph_clustering (line 2362) | async def perform_graph_clustering(
    method _call_clustering_service (line 2404) | async def _call_clustering_service(
    method _create_graph_and_cluster (line 2439) | async def _create_graph_and_cluster(
    method _cluster_and_add_community_info (line 2450) | async def _cluster_and_add_community_info(
    method get_entity_map (line 2483) | async def get_entity_map(
    method graph_search (line 2549) | async def graph_search(
    method _build_filters (line 2622) | def _build_filters(
    method get_existing_document_entity_chunk_ids (line 2753) | async def get_existing_document_entity_chunk_ids(
    method get_entity_count (line 2766) | async def get_entity_count(
    method update_entity_descriptions (line 2792) | async def update_entity_descriptions(self, entities: list[Entity]):
  function _json_serialize (line 2812) | def _json_serialize(obj):
  function _add_objects (line 2820) | async def _add_objects(

FILE: py/core/providers/database/limits.py
  class PostgresLimitsHandler (line 15) | class PostgresLimitsHandler(Handler):
    method __init__ (line 18) | def __init__(
    method create_tables (line 34) | async def create_tables(self):
    method _count_requests (line 45) | async def _count_requests(
    method _count_monthly_requests (line 78) | async def _count_monthly_requests(
    method determine_effective_limits (line 96) | def determine_effective_limits(
    method check_limits (line 155) | async def check_limits(self, user: User, route: str):
    method log_request (line 212) | async def log_request(self, user_id: UUID, route: str):

FILE: py/core/providers/database/maintenance.py
  class PostgresMaintenanceHandler (line 10) | class PostgresMaintenanceHandler(Handler):
    method __init__ (line 11) | def __init__(
    method create_tables (line 29) | async def create_tables(self):
    method vacuum_table (line 32) | async def vacuum_table(
    method vacuum_all_tables (line 77) | async def vacuum_all_tables(

FILE: py/core/providers/database/postgres.py
  class PostgresDatabaseProvider (line 37) | class PostgresDatabaseProvider(DatabaseProvider):
    method __init__ (line 72) | def __init__(
    method initialize (line 201) | async def initialize(self):
    method schema_exists (line 239) | async def schema_exists(self, schema_name: str) -> bool:
    method _get_postgres_configuration_settings (line 254) | def _get_postgres_configuration_settings(
    method close (line 298) | async def close(self):
    method __aenter__ (line 302) | async def __aenter__(self):
    method __aexit__ (line 306) | async def __aexit__(self, exc_type, exc, tb):

FILE: py/core/providers/database/prompts_handler.py
  class CacheEntry (line 22) | class CacheEntry(Generic[T]):
  class Cache (line 31) | class Cache(Generic[T]):
    method __init__ (line 34) | def __init__(
    method get (line 46) | def get(self, key: str) -> Optional[T]:
    method set (line 63) | def set(self, key: str, value: T) -> None:
    method invalidate (line 75) | def invalidate(self, key: str) -> None:
    method clear (line 79) | def clear(self) -> None:
    method _maybe_cleanup (line 83) | def _maybe_cleanup(self) -> None:
    method _cleanup (line 90) | def _cleanup(self) -> None:
    method _evict_lru (line 102) | def _evict_lru(self) -> None:
  class CacheablePromptHandler (line 113) | class CacheablePromptHandler(Handler):
    method __init__ (line 117) | def __init__(
    method _cache_key (line 127) | def _cache_key(
    method get_cached_prompt (line 137) | async def get_cached_prompt(
    method get_prompt (line 172) | async def get_prompt(  # type: ignore
    method _format_prompt (line 201) | def _format_prompt(
    method update_prompt (line 217) | async def update_prompt(
    method _update_prompt_impl (line 243) | async def _update_prompt_impl(
    method _get_template_info (line 253) | async def _get_template_info(self, prompt_name: str) -> Optional[dict]:
    method _get_prompt_impl (line 258) | async def _get_prompt_impl(
  class PostgresPromptsHandler (line 268) | class PostgresPromptsHandler(CacheablePromptHandler):
    method __init__ (line 271) | def __init__(
    method _load_prompts (line 286) | async def _load_prompts(self) -> None:
    method _load_prompts_from_database (line 294) | async def _load_prompts_from_database(self) -> None:
    method _load_prompts_from_yaml_directory (line 331) | async def _load_prompts_from_yaml_directory(
    method _get_table_name (line 395) | def _get_table_name(self, base_name: str) -> str:
    method _get_prompt_impl (line 400) | async def _get_prompt_impl(
    method _get_template_info (line 445) | async def _get_template_info(self, prompt_name: str) -> Optional[dict]...
    method _update_prompt_impl (line 476) | async def _update_prompt_impl(
    method create_tables (line 538) | async def create_tables(self):
    method add_prompt (line 569) | async def add_prompt(
    method get_all_prompts (line 661) | async def get_all_prompts(self) -> dict[str, Any]:
    method delete_prompt (line 692) | async def delete_prompt(self, name: str) -> None:
    method get_message_payload (line 708) | async def get_message_payload(

FILE: py/core/providers/database/tokens.py
  class PostgresTokensHandler (line 9) | class PostgresTokensHandler(Handler):
    method __init__ (line 12) | def __init__(
    method create_tables (line 17) | async def create_tables(self):
    method blacklist_token (line 31) | async def blacklist_token(
    method is_token_blacklisted (line 45) | async def is_token_blacklisted(self, token: str) -> bool:
    method clean_expired_blacklisted_tokens (line 54) | async def clean_expired_blacklisted_tokens(

FILE: py/core/providers/database/users.py
  function _merge_metadata (line 19) | def _merge_metadata(
  class PostgresUserHandler (line 52) | class PostgresUserHandler(Handler):
    method __init__ (line 56) | def __init__(
    method create_tables (line 65) | async def create_tables(self):
    method get_user_by_id (line 143) | async def get_user_by_id(self, id: UUID) -> User:
    method get_user_by_email (line 195) | async def get_user_by_email(self, email: str) -> User:
    method create_user (line 246) | async def create_user(
    method update_user (line 367) | async def update_user(
    method delete_user_relational (line 507) | async def delete_user_relational(self, id: UUID) -> None:
    method update_user_password (line 550) | async def update_user_password(self, id: UUID, new_hashed_password: str):
    method get_all_users (line 560) | async def get_all_users(self) -> list[User]:
    method store_verification_code (line 614) | async def store_verification_code(
    method verify_user (line 626) | async def verify_user(self, verification_code: str) -> None:
    method remove_verification_code (line 642) | async def remove_verification_code(self, verification_code: str):
    method expire_verification_code (line 650) | async def expire_verification_code(self, id: UUID):
    method store_reset_token (line 658) | async def store_reset_token(
    method get_user_id_by_reset_token (line 670) | async def get_user_id_by_reset_token(
    method remove_reset_token (line 682) | async def remove_reset_token(self, id: UUID):
    method remove_user_from_all_collections (line 690) | async def remove_user_from_all_collections(self, id: UUID):
    method add_user_to_collection (line 698) | async def add_user_to_collection(
    method remove_user_from_collection (line 735) | async def remove_user_from_collection(
    method get_users_in_collection (line 757) | async def get_users_in_collection(
    method mark_user_as_superuser (line 827) | async def mark_user_as_superuser(self, id: UUID):
    method get_user_id_by_verification_code (line 836) | async def get_user_id_by_verification_code(
    method mark_user_as_verified (line 854) | async def mark_user_as_verified(self, id: UUID):
    method get_users_overview (line 864) | async def get_users_overview(
    method _collection_exists (line 952) | async def _collection_exists(self, collection_id: UUID) -> bool:
    method get_user_validation_data (line 963) | async def get_user_validation_data(
    method store_user_api_key (line 1004) | async def store_user_api_key(
    method get_api_key_record (line 1029) | async def get_api_key_record(self, key_id: str) -> Optional[dict]:
    method get_user_api_keys (line 1048) | async def get_user_api_keys(self, user_id: UUID) -> list[dict]:
    method delete_api_key (line 1068) | async def delete_api_key(self, user_id: UUID, key_id: UUID) -> bool:
    method update_api_key_name (line 1083) | async def update_api_key_name(
    method export_to_csv (line 1100) | async def export_to_csv(
    method get_user_by_google_id (line 1220) | async def get_user_by_google_id(self, google_id: str) -> Optional[User]:
    method get_user_by_github_id (line 1274) | async def get_user_by_github_id(self, github_id: str) -> Optional[User]:

FILE: py/core/providers/database/utils.py
  function psql_quote_literal (line 6) | def psql_quote_literal(value: str) -> str:

FILE: py/core/providers/email/console_mock.py
  class ConsoleMockEmailProvider (line 9) | class ConsoleMockEmailProvider(EmailProvider):
    method send_email (line 13) | async def send_email(
    method send_verification_email (line 31) | async def send_verification_email(
    method send_password_reset_email (line 43) | async def send_password_reset_email(
    method send_password_changed_email (line 55) | async def send_password_changed_email(

FILE: py/core/providers/email/mailersend.py
  class MailerSendEmailProvider (line 12) | class MailerSendEmailProvider(EmailProvider):
    method __init__ (line 15) | def __init__(self, config: EmailConfig):
    method _get_base_template_data (line 51) | def _get_base_template_data(self, to_email: str) -> dict:
    method send_email (line 60) | async def send_email(
    method send_verification_email (line 140) | async def send_verification_email(
    method send_password_reset_email (line 193) | async def send_password_reset_email(
    method send_password_changed_email (line 241) | async def send_password_changed_email(

FILE: py/core/providers/email/sendgrid.py
  class SendGridEmailProvider (line 13) | class SendGridEmailProvider(EmailProvider):
    method __init__ (line 16) | def __init__(self, config: EmailConfig):
    method _get_base_template_data (line 50) | def _get_base_template_data(self, to_email: str) -> dict:
    method send_email (line 59) | async def send_email(
    method send_verification_email (line 113) | async def send_verification_email(
    method send_password_reset_email (line 167) | async def send_password_reset_email(
    method send_password_changed_email (line 216) | async def send_password_changed_email(

FILE: py/core/providers/email/smtp.py
  class AsyncSMTPEmailProvider (line 15) | class AsyncSMTPEmailProvider(EmailProvider):
    method __init__ (line 18) | def __init__(self, config: EmailConfig):
    method _send_email_sync (line 47) | async def _send_email_sync(self, msg: MIMEMultipart) -> None:
    method send_email (line 71) | async def send_email(
    method send_verification_email (line 102) | async def send_verification_email(
    method send_password_reset_email (line 128) | async def send_password_reset_email(
    method send_password_changed_email (line 154) | async def send_password_changed_email(

FILE: py/core/providers/embeddings/litellm.py
  class LiteLLMEmbeddingProvider (line 25) | class LiteLLMEmbeddingProvider(EmbeddingProvider):
    method __init__ (line 26) | def __init__(
    method _get_embedding_kwargs (line 67) | def _get_embedding_kwargs(self, **kwargs):
    method _execute_task (line 79) | async def _execute_task(self, task: dict[str, Any]) -> list[list[float]]:
    method _execute_task_sync (line 111) | def _execute_task_sync(self, task: dict[str, Any]) -> list[list[float]]:
    method async_get_embedding (line 137) | async def async_get_embedding(
    method get_embedding (line 155) | def get_embedding(
    method async_get_embeddings (line 173) | async def async_get_embeddings(
    method get_embeddings (line 191) | def get_embeddings(
    method rerank (line 209) | def rerank(
    method arerank (line 258) | async def arerank(

FILE: py/core/providers/embeddings/ollama.py
  class OllamaEmbeddingProvider (line 17) | class OllamaEmbeddingProvider(EmbeddingProvider):
    method __init__ (line 18) | def __init__(self, config: EmbeddingConfig):
    method _get_embedding_kwargs (line 45) | def _get_embedding_kwargs(self, **kwargs):
    method _execute_task (line 52) | async def _execute_task(self, task: dict[str, Any]) -> list[list[float]]:
    method _execute_task_sync (line 68) | def _execute_task_sync(self, task: dict[str, Any]) -> list[list[float]]:
    method async_get_embedding (line 84) | async def async_get_embedding(
    method get_embedding (line 103) | def get_embedding(
    method async_get_embeddings (line 122) | async def async_get_embeddings(
    method get_embeddings (line 140) | def get_embeddings(
    method rerank (line 158) | def rerank(
    method arerank (line 167) | async def arerank(

FILE: py/core/providers/embeddings/openai.py
  class OpenAIEmbeddingProvider (line 21) | class OpenAIEmbeddingProvider(EmbeddingProvider):
    method __init__ (line 33) | def __init__(self, config: EmbeddingConfig):
    method _get_dimensions (line 88) | def _get_dimensions(self):
    method _get_embedding_kwargs (line 96) | def _get_embedding_kwargs(self, **kwargs):
    method _execute_task (line 102) | async def _execute_task(self, task: dict[str, Any]) -> list[list[float]]:
    method _execute_task_sync (line 128) | def _execute_task_sync(self, task: dict[str, Any]) -> list[list[float]]:
    method async_get_embedding (line 153) | async def async_get_embedding(
    method get_embedding (line 172) | def get_embedding(
    method async_get_embeddings (line 191) | async def async_get_embeddings(
    method get_embeddings (line 209) | def get_embeddings(
    method rerank (line 227) | def rerank(
    method arerank (line 236) | async def arerank(
    method tokenize_string (line 245) | def tokenize_string(self, text: str, model: str) -> list[int]:

FILE: py/core/providers/embeddings/utils.py
  function truncate_texts_to_token_limit (line 8) | def truncate_texts_to_token_limit(texts: list[str], model: str) -> list[...

FILE: py/core/providers/file/postgres.py
  class PostgresFileProvider (line 17) | class PostgresFileProvider(FileProvider):
    method __init__ (line 20) | def __init__(
    method _get_table_name (line 31) | def _get_table_name(self, base_name: str) -> str:
    method initialize (line 34) | async def initialize(self) -> None:
    method upsert_file (line 66) | async def upsert_file(
    method store_file (line 90) | async def store_file(
    method _write_lobject (line 112) | async def _write_lobject(
    method retrieve_file (line 137) | async def retrieve_file(
    method retrieve_files_as_zip (line 166) | async def retrieve_files_as_zip(
    method _read_lobject (line 222) | async def _read_lobject(self, conn, oid: int) -> bytes:
    method delete_file (line 266) | async def delete_file(self, document_id: UUID) -> bool:
    method _delete_lobject (line 292) | async def _delete_lobject(self, conn, oid: int) -> None:
    method get_files_overview (line 296) | async def get_files_overview(

FILE: py/core/providers/file/s3.py
  class S3FileProvider (line 17) | class S3FileProvider(FileProvider):
    method __init__ (line 20) | def __init__(self, config: FileConfig):
    method _get_s3_key (line 44) | def _get_s3_key(self, document_id: UUID) -> str:
    method initialize (line 48) | async def initialize(self) -> None:
    method store_file (line 65) | async def store_file(
    method retrieve_file (line 98) | async def retrieve_file(
    method retrieve_files_as_zip (line 137) | async def retrieve_files_as_zip(
    method delete_file (line 197) | async def delete_file(self, document_id: UUID) -> bool:
    method get_files_overview (line 222) | async def get_files_overview(

FILE: py/core/providers/ingestion/r2r/base.py
  class R2RIngestionConfig (line 31) | class R2RIngestionConfig(IngestionConfig):
  class R2RIngestionProvider (line 39) | class R2RIngestionProvider(IngestionProvider):
    method __init__ (line 97) | def __init__(
    method _initialize_parsers (line 125) | def _initialize_parsers(self):
    method _build_text_splitter (line 160) | def _build_text_splitter(
    method validate_config (line 217) | def validate_config(self) -> bool:
    method chunk (line 220) | def chunk(
    method parse (line 244) | async def parse(
    method get_parser_for_document_type (line 440) | def get_parser_for_document_type(self, doc_type: DocumentType) -> Any:

FILE: py/core/providers/ingestion/unstructured/base.py
  class FallbackElement (line 38) | class FallbackElement(R2RSerializable):
  class UnstructuredIngestionConfig (line 43) | class UnstructuredIngestionConfig(IngestionConfig):
    method to_ingestion_request (line 74) | def to_ingestion_request(self):
  class UnstructuredIngestionProvider (line 86) | class UnstructuredIngestionProvider(IngestionProvider):
    method __init__ (line 122) | def __init__(
    method _initialize_parsers (line 179) | def _initialize_parsers(self):
    method parse_fallback (line 217) | async def parse_fallback(
    method parse (line 285) | async def parse(
    method get_parser_for_document_type (line 444) | def get_parser_for_document_type(self, doc_type: DocumentType) -> str:

FILE: py/core/providers/llm/anthropic.py
  function generate_tool_id (line 33) | def generate_tool_id() -> str:
  function process_images_in_message (line 38) | def process_images_in_message(message: dict) -> dict:
  function openai_message_to_anthropic_block (line 84) | def openai_message_to_anthropic_block(msg: dict) -> dict:
  class AnthropicCompletionProvider (line 173) | class AnthropicCompletionProvider(CompletionProvider):
    method __init__ (line 174) | def __init__(self, config: CompletionConfig, *args, **kwargs) -> None:
    method _get_base_args (line 180) | def _get_base_args(
    method _preprocess_messages (line 266) | def _preprocess_messages(self, messages: list[dict]) -> list[dict]:
    method _create_openai_style_message (line 280) | def _create_openai_style_message(self, content_blocks, tool_calls=None):
    method _convert_to_chat_completion (line 330) | def _convert_to_chat_completion(self, anthropic_msg: Message) -> dict:
    method _split_system_messages (line 417) | def _split_system_messages(
    method _execute_task (line 607) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_async_nonstreaming (line 636) | async def _execute_task_async_nonstreaming(
    method _execute_task_async_streaming (line 659) | async def _execute_task_async_streaming(
    method _execute_task_sync (line 698) | def _execute_task_sync(self, task: dict[str, Any]):
    method _execute_task_sync_nonstreaming (line 718) | def _execute_task_sync_nonstreaming(
    method _execute_task_sync_streaming (line 732) | def _execute_task_sync_streaming(
    method _process_stream_event (line 765) | def _process_stream_event(

FILE: py/core/providers/llm/azure_foundry.py
  class AzureFoundryCompletionProvider (line 19) | class AzureFoundryCompletionProvider(CompletionProvider):
    method __init__ (line 20) | def __init__(self, config: CompletionConfig, *args, **kwargs) -> None:
    method _get_base_args (line 48) | def _get_base_args(
    method _execute_task (line 67) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 91) | def _execute_task_sync(self, task: dict[str, Any]):

FILE: py/core/providers/llm/litellm.py
  class LiteLLMCompletionProvider (line 13) | class LiteLLMCompletionProvider(CompletionProvider):
    method __init__ (line 14) | def __init__(self, config: CompletionConfig, *args, **kwargs) -> None:
    method _get_base_args (line 26) | def _get_base_args(
    method _execute_task (line 48) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 63) | def _execute_task_sync(self, task: dict[str, Any]):

FILE: py/core/providers/llm/openai.py
  class OpenAICompletionProvider (line 15) | class OpenAICompletionProvider(CompletionProvider):
    method __init__ (line 16) | def __init__(self, config: CompletionConfig, *args, **kwargs) -> None:
    method _get_client_and_model (line 148) | def _get_client_and_model(self, model: str):
    method _get_async_client_and_model (line 205) | def _get_async_client_and_model(self, model: str):
    method _process_messages_with_images (line 259) | def _process_messages_with_images(
    method _process_array_content_with_images (line 319) | def _process_array_content_with_images(self, content: list) -> list:
    method _preprocess_messages (line 365) | def _preprocess_messages(self, messages: list[dict]) -> list[dict]:
    method _get_base_args (line 393) | def _get_base_args(self, generation_config: GenerationConfig) -> dict:
    method _execute_task (line 425) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 478) | def _execute_task_sync(self, task: dict[str, Any]):

FILE: py/core/providers/llm/r2r_llm.py
  class R2RCompletionProvider (line 15) | class R2RCompletionProvider(CompletionProvider):
    method __init__ (line 25) | def __init__(self, config: CompletionConfig, *args, **kwargs) -> None:
    method _choose_subprovider_by_model (line 49) | def _choose_subprovider_by_model(
    method _execute_task (line 82) | async def _execute_task(self, task: dict[str, Any]):
    method _execute_task_sync (line 90) | def _execute_task_sync(self, task: dict[str, Any]):

FILE: py/core/providers/llm/utils.py
  function resize_base64_image (line 11) | def resize_base64_image(
  function estimate_image_tokens (line 95) | def estimate_image_tokens(width: int, height: int) -> int:

FILE: py/core/providers/ocr/mistral.py
  class MistralOCRProvider (line 13) | class MistralOCRProvider(OCRProvider):
    method __init__ (line 14) | def __init__(self, config: OCRConfig) -> None:
    method _execute_task (line 31) | async def _execute_task(self, task: dict[str, Any]) -> OCRResponse:
    method _execute_task_sync (line 43) | def _execute_task_sync(self, task: dict[str, Any]) -> OCRResponse:
    method upload_file (line 55) | async def upload_file(
    method process_file (line 89) | async def process_file(
    method process_url (line 121) | async def process_url(
    method process_pdf (line 153) | async def process_pdf(

FILE: py/core/providers/orchestration/hatchet.py
  class HatchetOrchestrationProvider (line 11) | class HatchetOrchestrationProvider(OrchestrationProvider):
    method __init__ (line 12) | def __init__(self, config: OrchestrationConfig):
    method workflow (line 31) | def workflow(self, *args, **kwargs) -> Callable:
    method step (line 34) | def step(self, *args, **kwargs) -> Callable:
    method failure (line 37) | def failure(self, *args, **kwargs) -> Callable:
    method get_worker (line 40) | def get_worker(self, name: str, max_runs: Optional[int] = None) -> Any:
    method concurrency (line 46) | def concurrency(self, *args, **kwargs) -> Callable:
    method start_worker (line 49) | async def start_worker(self):
    method run_workflow (line 57) | async def run_workflow(
    method register_workflows (line 79) | def register_workflows(

FILE: py/core/providers/orchestration/simple.py
  class SimpleOrchestrationProvider (line 6) | class SimpleOrchestrationProvider(OrchestrationProvider):
    method __init__ (line 7) | def __init__(self, config: OrchestrationConfig):
    method start_worker (line 12) | async def start_worker(self):
    method get_worker (line 15) | def get_worker(self, name: str, max_runs: int) -> Any:
    method step (line 18) | def step(self, *args, **kwargs) -> Any:
    method workflow (line 21) | def workflow(self, *args, **kwargs) -> Any:
    method failure (line 24) | def failure(self, *args, **kwargs) -> Any:
    method register_workflows (line 27) | def register_workflows(
    method run_workflow (line 47) | async def run_workflow(

FILE: py/core/providers/scheduler/apscheduler.py
  class APSchedulerProvider (line 10) | class APSchedulerProvider(SchedulerProvider):
    method __init__ (line 13) | def __init__(self, config: SchedulerConfig):
    method add_job (line 17) | async def add_job(self, func, trigger, **kwargs):
    method start (line 23) | async def start(self):
    method shutdown (line 27) | async def shutdown(self):
    method __aenter__ (line 32) | async def __aenter__(self):
    method __aexit__ (line 36) | async def __aexit__(self, exc_type, exc, tb):

FILE: py/core/utils/__init__.py
  function extract_citations (line 29) | def extract_citations(text: str) -> list[str]:
  function extract_citation_spans (line 55) | def extract_citation_spans(text: str) -> dict[str, list[Tuple[int, int]]]:
  class CitationTracker (line 90) | class CitationTracker:
    method __init__ (line 102) | def __init__(self):
    method is_new_citation (line 110) | def is_new_citation(self, citation_id: str) -> bool:
    method is_new_span (line 129) | def is_new_span(self, citation_id: str, span: Tuple[int, int]) -> bool:
    method get_all_spans (line 158) | def get_all_spans(self) -> dict[str, list[Tuple[int, int]]]:
    method reset (line 169) | def reset(self) -> None:
  function find_new_citation_spans (line 178) | def find_new_citation_spans(

FILE: py/core/utils/context.py
  function get_current_project_schema (line 8) | def get_current_project_schema() -> str | None:
  function set_project_schema (line 13) | def set_project_schema(schema_name: str) -> Token:

FILE: py/core/utils/logging_config.py
  class HTTPStatusFilter (line 9) | class HTTPStatusFilter(logging.Filter):
    method filter (line 33) | def filter(self, record: logging.LogRecord) -> bool:
  function configure_logging (line 162) | def configure_logging() -> Path:

FILE: py/core/utils/sentry.py
  function init_sentry (line 7) | def init_sentry():

FILE: py/core/utils/serper.py
  function process_json (line 11) | def process_json(json_object, indent=0):
  class SerperClient (line 35) | class SerperClient:
    method __init__ (line 36) | def __init__(self, api_base: str = "google.serper.dev") -> None:
    method _extract_results (line 50) | def _extract_results(result_data: dict) -> list:
    method get_raw (line 75) | def get_raw(self, query: str, limit: int = 10) -> list:

FILE: py/migrations/env.py
  function get_schema_name (line 23) | def get_schema_name():
  function include_object (line 28) | def include_object(object, name, type_, reflected, compare_to):
  function run_migrations_offline (line 36) | def run_migrations_offline() -> None:
  function run_migrations_online (line 58) | def run_migrations_online() -> None:

FILE: py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py
  class Vector (line 40) | class Vector(UserDefinedType):
    method get_col_spec (line 41) | def get_col_spec(self, **kw):
  function run_async (line 45) | def run_async(coroutine):
  function async_generate_all_summaries (line 51) | async def async_generate_all_summaries():
  function generate_all_summaries (line 183) | def generate_all_summaries():
  function check_if_upgrade_needed (line 188) | def check_if_upgrade_needed():
  function upgrade (line 221) | def upgrade() -> None:
  function downgrade (line 294) | def downgrade() -> None:

FILE: py/migrations/versions/3efc7b3b1b3d_add_total_tokens_count.py
  function count_tokens_for_text (line 29) | def count_tokens_for_text(text: str, model: str = "gpt-3.5-turbo") -> int:
  function check_if_upgrade_needed (line 43) | def check_if_upgrade_needed() -> bool:
  function upgrade (line 71) | def upgrade() -> None:
  function downgrade (line 167) | def downgrade() -> None:

FILE: py/migrations/versions/7eb70560f406_add_limits_overrides_to_users.py
  function check_if_upgrade_needed (line 24) | def check_if_upgrade_needed():
  function upgrade (line 51) | def upgrade() -> None:
  function downgrade (line 63) | def downgrade() -> None:

FILE: py/migrations/versions/8077140e1e99_v3_api_database_revision.py
  function check_if_upgrade_needed (line 28) | def check_if_upgrade_needed():
  function upgrade (line 56) | def upgrade() -> None:
  function downgrade (line 219) | def downgrade() -> None:

FILE: py/migrations/versions/c45a9cf6a8a4_add_user_and_document_count_to_.py
  function check_if_upgrade_needed (line 28) | def check_if_upgrade_needed():
  function upgrade (line 48) | def upgrade():
  function downgrade (line 87) | def downgrade():

FILE: py/migrations/versions/d342e632358a_migrate_to_asyncpg.py
  class Vector (line 29) | class Vector(UserDefinedType):
    method get_col_spec (line 30) | def get_col_spec(self, **kw):
  function check_if_upgrade_needed (line 34) | def check_if_upgrade_needed():
  function upgrade (line 66) | def upgrade() -> None:
  function downgrade (line 177) | def downgrade() -> None:

FILE: py/r2r/__init__.py
  function get_version (line 18) | def get_version():

FILE: py/r2r/mcp.py
  function id_to_shorthand (line 5) | def id_to_shorthand(id: str) -> str:
  function format_search_results_for_llm (line 9) | def format_search_results_for_llm(
  function search (line 107) | async def search(query: str) -> str:
  function rag (line 128) | async def rag(query: str) -> str:

FILE: py/r2r/serve.py
  function create_app (line 22) | async def create_app(
  function run_server (line 61) | def run_server(
  function main (line 103) | def main():

FILE: py/sdk/asnyc_methods/chunks.py
  class ChunksSDK (line 15) | class ChunksSDK:
    method __init__ (line 18) | def __init__(self, client):
    method update (line 21) | async def update(
    method retrieve (line 43) | async def retrieve(
    method list_by_document (line 65) | async def list_by_document(
    method delete (line 99) | async def delete(
    method list (line 119) | async def list(
    method search (line 158) | async def search(

FILE: py/sdk/asnyc_methods/collections.py
  class CollectionsSDK (line 14) | class CollectionsSDK:
    method __init__ (line 15) | def __init__(self, client):
    method create (line 18) | async def create(
    method list (line 42) | async def list(
    method retrieve (line 74) | async def retrieve(
    method update (line 92) | async def update(
    method delete (line 127) | async def delete(
    method list_documents (line 145) | async def list_documents(
    method add_document (line 175) | async def add_document(
    method remove_document (line 197) | async def remove_document(
    method list_users (line 219) | async def list_users(
    method add_user (line 246) | async def add_user(
    method remove_user (line 266) | async def remove_user(
    method extract (line 288) | async def extract(
    method retrieve_by_name (line 321) | async def retrieve_by_name(

FILE: py/sdk/asnyc_methods/conversations.py
  class ConversationsSDK (line 17) | class ConversationsSDK:
    method __init__ (line 18) | def __init__(self, client):
    method create (line 21) | async def create(
    method list (line 44) | async def list(
    method retrieve (line 76) | async def retrieve(
    method update (line 96) | async def update(
    method delete (line 123) | async def delete(
    method add_message (line 143) | async def add_message(
    method update_message (line 181) | async def update_message(
    method export (line 211) | async def export(
    method export_messages (line 262) | async def export_messages(

FILE: py/sdk/asnyc_methods/documents.py
  class DocumentsSDK (line 35) | class DocumentsSDK:
    method __init__ (line 38) | def __init__(self, client):
    method create (line 41) | async def create(
    method append_metadata (line 185) | async def append_metadata(
    method replace_metadata (line 209) | async def replace_metadata(
    method retrieve (line 233) | async def retrieve(
    method download (line 253) | async def download(
    method download_zip (line 276) | async def download_zip(
    method export (line 326) | async def export(
    method export_entities (line 376) | async def export_entities(
    method export_relationships (line 428) | async def export_relationships(
    method delete (line 480) | async def delete(
    method list_chunks (line 500) | async def list_chunks(
    method list_collections (line 532) | async def list_collections(
    method delete_by_filter (line 562) | async def delete_by_filter(
    method extract (line 584) | async def extract(
    method list_entities (line 615) | async def list_entities(
    method list_relationships (line 647) | async def list_relationships(
    method list (line 685) | async def list(
    method search (line 723) | async def search(
    method deduplicate (line 757) | async def deduplicate(

FILE: py/sdk/asnyc_methods/graphs.py
  class GraphsSDK (line 19) | class GraphsSDK:
    method __init__ (line 22) | def __init__(self, client):
    method list (line 25) | async def list(
    method retrieve (line 54) | async def retrieve(
    method reset (line 72) | async def reset(
    method update (line 95) | async def update(
    method list_entities (line 126) | async def list_entities(
    method get_entity (line 156) | async def get_entity(
    method remove_entity (line 178) | async def remove_entity(
    method list_relationships (line 198) | async def list_relationships(
    method get_relationship (line 228) | async def get_relationship(
    method remove_relationship (line 250) | async def remove_relationship(
    method build (line 272) | async def build(
    method list_communities (line 302) | async def list_communities(
    method get_community (line 332) | async def get_community(
    method update_community (line 354) | async def update_community(
    method delete_community (line 407) | async def delete_community(
    method pull (line 429) | async def pull(
    method remove_document (line 462) | async def remove_document(
    method create_entity (line 486) | async def create_entity(
    method create_relationship (line 524) | async def create_relationship(
    method create_community (line 574) | async def create_community(

FILE: py/sdk/asnyc_methods/indices.py
  class IndicesSDK (line 11) | class IndicesSDK:
    method __init__ (line 12) | def __init__(self, client):
    method create (line 15) | async def create(
    method list (line 42) | async def list(
    method retrieve (line 74) | async def retrieve(
    method delete (line 96) | async def delete(

FILE: py/sdk/asnyc_methods/prompts.py
  class PromptsSDK (line 12) | class PromptsSDK:
    method __init__ (line 13) | def __init__(self, client):
    method create (line 16) | async def create(
    method list (line 42) | async def list(self) -> WrappedPromptsResponse:
    method retrieve (line 56) | async def retrieve(
    method update (line 85) | async def update(
    method delete (line 114) | async def delete(self, name: str) -> WrappedBooleanResponse:

FILE: py/sdk/asnyc_methods/retrieval.py
  class RetrievalSDK (line 29) | class RetrievalSDK:
    method __init__ (line 32) | def __init__(self, client):
    method search (line 35) | async def search(
    method completion (line 70) | async def completion(
    method embedding (line 106) | async def embedding(self, text: str) -> WrappedEmbeddingResponse:
    method rag (line 128) | async def rag(
    method agent (line 209) | async def agent(

FILE: py/sdk/asnyc_methods/system.py
  class SystemSDK (line 8) | class SystemSDK:
    method __init__ (line 9) | def __init__(self, client):
    method health (line 12) | async def health(self) -> WrappedGenericMessageResponse:
    method settings (line 20) | async def settings(self) -> WrappedSettingsResponse:
    method status (line 32) | async def status(self) -> WrappedServerStatsResponse:

FILE: py/sdk/asnyc_methods/users.py
  class UsersSDK (line 18) | class UsersSDK:
    method __init__ (line 19) | def __init__(self, client):
    method create (line 22) | async def create(
    method send_verification_email (line 64) | async def send_verification_email(
    method delete (line 77) | async def delete(
    method verify_email (line 102) | async def verify_email(
    method login (line 127) | async def login(self, email: str, password: str) -> WrappedLoginResponse:
    method logout (line 164) | async def logout(self) -> WrappedGenericMessageResponse | None:
    method refresh_token (line 181) | async def refresh_token(self) -> WrappedTokenResponse:
    method change_password (line 199) | async def change_password(
    method request_password_reset (line 224) | async def request_password_reset(
    method reset_password (line 244) | async def reset_password(
    method list (line 269) | async def list(
    method retrieve (line 300) | async def retrieve(
    method me (line 320) | async def me(
    method update (line 336) | async def update(
    method list_collections (line 385) | async def list_collections(
    method add_to_collection (line 415) | async def add_to_collection(
    method remove_from_collection (line 434) | async def remove_from_collection(
    method create_api_key (line 456) | async def create_api_key(
    method list_api_keys (line 487) | async def list_api_keys(
    method delete_api_key (line 507) | async def delete_api_key(
    method get_limits (line 529) | async def get_limits(self) -> WrappedLimitsResponse:
    method oauth_google_authorize (line 538) | async def oauth_google_authorize(self) -> WrappedGenericMessageResponse:
    method oauth_github_authorize (line 552) | async def oauth_github_authorize(self) -> WrappedGenericMessageResponse:
    method oauth_google_callback (line 566) | async def oauth_google_callback(
    method oauth_github_callback (line 580) | async def oauth_github_callback(

FILE: py/sdk/async_client.py
  class R2RAsyncClient (line 25) | class R2RAsyncClient(BaseClient):
    method __init__ (line 28) | def __init__(
    method _make_request (line 47) | async def _make_request(
    method _make_streaming_request (line 72) | async def _make_streaming_request(
    method _handle_response (line 88) | async def _handle_response(self, response: Response) -> None:
    method close (line 111) | async def close(self):
    method __aenter__ (line 114) | async def __aenter__(self):
    method __aexit__ (line 117) | async def __aexit__(self, exc_type, exc_val, exc_tb):
    method set_api_key (line 120) | def set_api_key(self, api_key: str) -> None:
    method unset_api_key (line 125) | def unset_api_key(self) -> None:
    method set_base_url (line 128) | def set_base_url(self, base_url: str) -> None:
    method set_project_name (line 131) | def set_project_name(self, project_name: str | None) -> None:
    method unset_project_name (line 134) | def unset_project_name(self) -> None:

FILE: py/sdk/base/base_client.py
  class BaseClient (line 6) | class BaseClient:
    method __init__ (line 7) | def __init__(
    method _get_auth_header (line 22) | def _get_auth_header(self) -> dict[str, str]:
    method _get_full_url (line 34) | def _get_full_url(self, endpoint: str, version: str = "v3") -> str:
    method _prepare_request_args (line 37) | def _prepare_request_args(self, endpoint: str, **kwargs) -> dict:

FILE: py/sdk/sync_client.py
  class R2RClient (line 24) | class R2RClient(BaseClient):
    method __init__ (line 25) | def __init__(
    method _make_request (line 44) | def _make_request(
    method _make_streaming_request (line 70) | def _make_streaming_request(
    method _handle_response (line 126) | def _handle_response(self, response: Response) -> None:
    method set_api_key (line 149) | def set_api_key(self, api_key: str) -> None:
    method unset_api_key (line 154) | def unset_api_key(self) -> None:
    method set_base_url (line 157) | def set_base_url(self, base_url: str) -> None:
    method set_project_name (line 160) | def set_project_name(self, project_name: str | None) -> None:
    method unset_project_name (line 163) | def unset_project_name(self) -> None:

FILE: py/sdk/sync_methods/chunks.py
  class ChunksSDK (line 15) | class ChunksSDK:
    method __init__ (line 18) | def __init__(self, client):
    method update (line 21) | def update(
    method retrieve (line 43) | def retrieve(
    method list_by_document (line 65) | def list_by_document(
    method delete (line 99) | def delete(
    method list (line 119) | def list(
    method search (line 158) | def search(

FILE: py/sdk/sync_methods/collections.py
  class CollectionsSDK (line 14) | class CollectionsSDK:
    method __init__ (line 15) | def __init__(self, client):
    method create (line 18) | def create(
    method list (line 42) | def list(
    method retrieve (line 74) | def retrieve(
    method update (line 92) | def update(
    method delete (line 127) | def delete(
    method list_documents (line 145) | def list_documents(
    method add_document (line 175) | def add_document(
    method remove_document (line 197) | def remove_document(
    method list_users (line 219) | def list_users(
    method add_user (line 246) | def add_user(
    method remove_user (line 266) | def remove_user(
    method extract (line 288) | def extract(
    method retrieve_by_name (line 321) | def retrieve_by_name(

FILE: py/sdk/sync_methods/conversations.py
  class ConversationsSDK (line 15) | class ConversationsSDK:
    method __init__ (line 16) | def __init__(self, client):
    method create (line 19) | def create(
    method list (line 41) | def list(
    method retrieve (line 73) | def retrieve(
    method update (line 93) | def update(
    method delete (line 120) | def delete(
    method add_message (line 140) | def add_message(
    method update_message (line 178) | def update_message(
    method export (line 208) | def export(
    method export_messages (line 259) | def export_messages(

FILE: py/sdk/sync_methods/documents.py
  class DocumentsSDK (line 34) | class DocumentsSDK:
    method __init__ (line 37) | def __init__(self, client):
    method create (line 40) | def create(
    method append_metadata (line 184) | def append_metadata(
    method replace_metadata (line 208) | def replace_metadata(
    method retrieve (line 232) | def retrieve(
    method download (line 252) | def download(
    method download_zip (line 275) | def download_zip(
    method export (line 325) | def export(
    method export_entities (line 373) | def export_entities(
    method export_relationships (line 425) | def export_relationships(
    method delete (line 477) | def delete(
    method list_chunks (line 497) | def list_chunks(
    method list_collections (line 529) | def list_collections(
    method delete_by_filter (line 559) | def delete_by_filter(
    method extract (line 581) | def extract(
    method list_entities (line 612) | def list_entities(
    method list_relationships (line 644) | def list_relationships(
    method list (line 682) | def list(
    method search (line 720) | def search(
    method deduplicate (line 755) | def deduplicate(

FILE: py/sdk/sync_methods/graphs.py
  class GraphsSDK (line 19) | class GraphsSDK:
    method __init__ (line 22) | def __init__(self, client):
    method list (line 25) | def list(
    method retrieve (line 54) | def retrieve(
    method reset (line 72) | def reset(
    method update (line 95) | def update(
    method list_entities (line 126) | def list_entities(
    method get_entity (line 156) | def get_entity(
    method remove_entity (line 178) | def remove_entity(
    method list_relationships (line 200) | def list_relationships(
    method get_relationship (line 230) | def get_relationship(
    method remove_relationship (line 252) | def remove_relationship(
    method build (line 274) | def build(
    method list_communities (line 304) | def list_communities(
    method get_community (line 334) | def get_community(
    method update_community (line 356) | def update_community(
    method delete_community (line 409) | def delete_community(
    method pull (line 431) | def pull(
    method remove_document (line 464) | def remove_document(
    method create_entity (line 488) | def create_entity(
    method create_relationship (line 526) | def create_relationship(
    method create_community (line 576) | def create_community(

FILE: py/sdk/sync_methods/indices.py
  class IndicesSDK (line 11) | class IndicesSDK:
    method __init__ (line 12) | def __init__(self, client):
    method create (line 15) | def create(
    method list (line 45) | def list(
    method retrieve (line 77) | def retrieve(
    method delete (line 99) | def delete(

FILE: py/sdk/sync_methods/prompts.py
  class PromptsSDK (line 12) | class PromptsSDK:
    method __init__ (line 13) | def __init__(self, client):
    method create (line 16) | def create(
    method list (line 42) | def list(self) -> WrappedPromptsResponse:
    method retrieve (line 56) | def retrieve(
    method update (line 85) | def update(
    method delete (line 114) | def delete(self, name: str) -> WrappedBooleanResponse:

FILE: py/sdk/sync_methods/retrieval.py
  function parse_retrieval_event (line 40) | def parse_retrieval_event(raw: dict) -> Optional[AgentEvent]:
  class RetrievalSDK (line 150) | class RetrievalSDK:
    method __init__ (line 153) | def __init__(self, client):
    method search (line 156) | def search(
    method completion (line 191) | def completion(
    method embedding (line 228) | def embedding(self, text: str) -> WrappedEmbeddingResponse:
    method rag (line 250) | def rag(
    method agent (line 329) | def agent(

FILE: py/sdk/sync_methods/system.py
  class SystemSDK (line 8) | class SystemSDK:
    method __init__ (line 9) | def __init__(self, client):
    method health (line 12) | def health(self) -> WrappedGenericMessageResponse:
    method settings (line 20) | def settings(self) -> WrappedSettingsResponse:
    method status (line 32) | def status(self) -> WrappedServerStatsResponse:

FILE: py/sdk/sync_methods/users.py
  class UsersSDK (line 18) | class UsersSDK:
    method __init__ (line 19) | def __init__(self, client):
    method create (line 22) | def create(
    method send_verification_email (line 64) | def send_verification_email(
    method delete (line 77) | def delete(self, id: str | UUID, password: str) -> WrappedBooleanRespo...
    method verify_email (line 100) | def verify_email(
    method login (line 125) | def login(self, email: str, password: str) -> WrappedLoginResponse:
    method logout (line 162) | def logout(self) -> WrappedGenericMessageResponse | None:
    method refresh_token (line 179) | def refresh_token(self) -> WrappedTokenResponse:
    method change_password (line 198) | def change_password(
    method request_password_reset (line 223) | def request_password_reset(
    method reset_password (line 243) | def reset_password(
    method list (line 268) | def list(
    method retrieve (line 299) | def retrieve(
    method me (line 319) | def me(
    method update (line 335) | def update(
    method list_collections (line 384) | def list_collections(
    method add_to_collection (line 414) | def add_to_collection(
    method remove_from_collection (line 433) | def remove_from_collection(
    method create_api_key (line 455) | def create_api_key(
    method list_api_keys (line 486) | def list_api_keys(
    method delete_api_key (line 506) | def delete_api_key(
    method get_limits (line 528) | def get_limits(self) -> WrappedLimitsResponse:
    method oauth_google_authorize (line 537) | def oauth_google_authorize(self) -> WrappedGenericMessageResponse:
    method oauth_github_authorize (line 551) | def oauth_github_authorize(self) -> WrappedGenericMessageResponse:
    method oauth_google_callback (line 564) | def oauth_google_callback(
    method oauth_github_callback (line 578) | def oauth_github_callback(

FILE: py/shared/abstractions/base.py
  class R2RSerializable (line 13) | class R2RSerializable(BaseModel):
    method from_dict (line 15) | def from_dict(cls: Type[T], data: dict[str, Any] | str) -> T:
    method as_dict (line 25) | def as_dict(self) -> dict[str, Any]:
    method to_dict (line 29) | def to_dict(self) -> dict[str, Any]:
    method to_json (line 33) | def to_json(self) -> str:
    method from_json (line 38) | def from_json(cls: Type[T], json_str: str) -> T:
    method _serialize_values (line 42) | def _serialize_values(data: Any) -> Any:
    class Config (line 59) | class Config:
  class AsyncSyncMeta (line 67) | class AsyncSyncMeta(type):
    method get_event_loop (line 71) | def get_event_loop(cls):
    method __new__ (line 77) | def __new__(cls, name, bases, dct):
  function syncable (line 142) | def syncable(func):

FILE: py/shared/abstractions/document.py
  class DocumentType (line 18) | class DocumentType(str, Enum):
  class Document (line 95) | class Document(R2RSerializable):
    class Config (line 102) | class Config:
  class IngestionStatus (line 111) | class IngestionStatus(str, Enum):
    method __str__ (line 126) | def __str__(self):
    method table_name (line 130) | def table_name(cls) -> str:
    method id_column (line 134) | def id_column(cls) -> str:
  class GraphExtractionStatus (line 138) | class GraphExtractionStatus(str, Enum):
    method __str__ (line 147) | def __str__(self):
    method table_name (line 151) | def table_name(cls) -> str:
    method id_column (line 155) | def id_column(cls) -> str:
  class GraphConstructionStatus (line 159) | class GraphConstructionStatus(str, Enum):
    method __str__ (line 168) | def __str__(self):
    method table_name (line 172) | def table_name(cls) -> str:
    method id_column (line 176) | def id_column(cls) -> str:
  class DocumentResponse (line 180) | class DocumentResponse(R2RSerializable):
    method convert_to_db_entry (line 201) | def convert_to_db_entry(self):
    class Config (line 230) | class Config:
  class UnprocessedChunk (line 253) | class UnprocessedChunk(R2RSerializable):
  class UpdateChunk (line 263) | class UpdateChunk(R2RSerializable):
  class DocumentChunk (line 271) | class DocumentChunk(R2RSerializable):
  class RawChunk (line 282) | class RawChunk(R2RSerializable):
  class IngestionMode (line 286) | class IngestionMode(str, Enum):
  class ChunkEnrichmentSettings (line 293) | class ChunkEnrichmentSettings(R2RSerializable):
  class IngestionConfig (line 314) | class IngestionConfig(R2RSerializable):
    method supported_providers (line 337) | def supported_providers(self) -> list[str]:
    method validate_config (line 340) | def validate_config(self) -> None:
    method get_default (line 345) | def get_default(cls, mode: str) -> "IngestionConfig":

FILE: py/shared/abstractions/exception.py
  class R2RException (line 6) | class R2RException(Exception):
    method __init__ (line 7) | def __init__(
    method to_dict (line 14) | def to_dict(self):
  class R2RClientException (line 23) | class R2RClientException(R2RException):
    method __init__ (line 26) | def __init__(
    method to_dict (line 35) | def to_dict(self):
  class R2RDocumentProcessingError (line 41) | class R2RDocumentProcessingError(R2RException):
    method __init__ (line 42) | def __init__(
    method to_dict (line 51) | def to_dict(self):
  class PDFParsingError (line 57) | class PDFParsingError(R2RException):
    method __init__ (line 60) | def __init__(
  class PopplerNotFoundError (line 72) | class PopplerNotFoundError(PDFParsingError):
    method __init__ (line 75) | def __init__(self):

FILE: py/shared/abstractions/graph.py
  class Entity (line 14) | class Entity(R2RSerializable):
    method __str__ (line 27) | def __str__(self):
    method __init__ (line 30) | def __init__(self, **kwargs):
  class Relationship (line 39) | class Relationship(R2RSerializable):
    method __init__ (line 59) | def __init__(self, **kwargs):
  class Community (line 69) | class Community(R2RSerializable):
    method __init__ (line 88) | def __init__(self, **kwargs):
    method from_dict (line 98) | def from_dict(cls, data: dict[str, Any] | str) -> "Community":
  class GraphExtraction (line 107) | class GraphExtraction(R2RSerializable):
  class Graph (line 114) | class Graph(R2RSerializable):
    class Config (line 126) | class Config:
    method from_dict (line 131) | def from_dict(cls, data: dict[str, Any] | str) -> "Graph":
    method __init__ (line 139) | def __init__(self, **kwargs):
  class StoreType (line 143) | class StoreType(str, Enum):
  class GraphCreationSettings (line 148) | class GraphCreationSettings(R2RSerializable):
  class GraphEnrichmentSettings (line 200) | class GraphEnrichmentSettings(R2RSerializable):
  class GraphCommunitySettings (line 230) | class GraphCommunitySettings(R2RSerializable):

FILE: py/shared/abstractions/llm.py
  class Function (line 18) | class Function(BaseModel):
  class ChatCompletionMessageToolCall (line 31) | class ChatCompletionMessageToolCall(BaseModel):
  class FunctionCall (line 42) | class FunctionCall(BaseModel):
  class ChatCompletionMessage (line 55) | class ChatCompletionMessage(BaseModel):
  class Choice (line 85) | class Choice(BaseModel):
  class LLMChatCompletion (line 113) | class LLMChatCompletion(BaseModel):
  class RAGCompletion (line 149) | class RAGCompletion:
    method __init__ (line 153) | def __init__(
  class GenerationConfig (line 162) | class GenerationConfig(R2RSerializable):
    method set_default (line 231) | def set_default(cls, **kwargs):
    method __init__ (line 240) | def __init__(self, **data):
    method __str__ (line 270) | def __str__(self):
    class Config (line 273) | class Config:
  class MessageType (line 290) | class MessageType(Enum):
    method __str__ (line 297) | def __str__(self):
  class Message (line 301) | class Message(R2RSerializable):
    class Config (line 315) | class Config:

FILE: py/shared/abstractions/prompt.py
  class Prompt (line 13) | class Prompt(BaseModel):
    method format_prompt (line 23) | def format_prompt(self, inputs: dict[str, Any]) -> str:
    method _validate_inputs (line 27) | def _validate_inputs(self, inputs: dict[str, Any]) -> None:
    method _convert_type (line 37) | def _convert_type(self, type_name: str) -> type:

FILE: py/shared/abstractions/search.py
  function generate_id_from_label (line 16) | def generate_id_from_label(label) -> UUID:
  class ChunkSearchResult (line 20) | class ChunkSearchResult(R2RSerializable):
    method __str__ (line 31) | def __str__(self) -> str:
    method __repr__ (line 39) | def __repr__(self) -> str:
    method as_dict (line 42) | def as_dict(self) -> dict:
    class Config (line 53) | class Config:
  class GraphSearchResultType (line 71) | class GraphSearchResultType(str, Enum):
  class GraphEntityResult (line 77) | class GraphEntityResult(R2RSerializable):
    class Config (line 83) | class Config:
  class GraphRelationshipResult (line 93) | class GraphRelationshipResult(R2RSerializable):
    class Config (line 104) | class Config:
    method __str__ (line 113) | def __str__(self) -> str:
  class GraphCommunityResult (line 117) | class GraphCommunityResult(R2RSerializable):
    class Config (line 123) | class Config:
    method __str__ (line 134) | def __str__(self) -> str:
  class GraphSearchResult (line 140) | class GraphSearchResult(R2RSerializable):
    method __str__ (line 148) | def __str__(self) -> str:
    class Config (line 151) | class Config:
  class WebPageSearchResult (line 170) | class WebPageSearchResult(R2RSerializable):
    class Config (line 180) | class Config:
    method __str__ (line 197) | def __str__(self) -> str:
  class RelatedSearchResult (line 201) | class RelatedSearchResult(R2RSerializable):
  class PeopleAlsoAskResult (line 207) | class PeopleAlsoAskResult(R2RSerializable):
  class WebSearchResult (line 216) | class WebSearchResult(R2RSerializable):
    method from_serper_results (line 222) | def from_serper_results(cls, results: list[dict]) -> "WebSearchResult":
  class AggregateSearchResult (line 255) | class AggregateSearchResult(R2RSerializable):
    method __str__ (line 267) | def __str__(self) -> str:
    method as_dict (line 275) | def as_dict(self) -> dict:
    class Config (line 309) | class Config:
  class HybridSearchSettings (line 392) | class HybridSearchSettings(R2RSerializable):
  class ChunkSearchSettings (line 410) | class ChunkSearchSettings(R2RSerializable):
  class GraphSearchSettings (line 431) | class GraphSearchSettings(R2RSerializable):
  class SearchSettings (line 443) | class SearchSettings(R2RSerializable):
    class Config (line 525) | class Config:
    method __init__ (line 564) | def __init__(self, **data):
    method model_dump (line 572) | def model_dump(self, *args, **kwargs):
    method get_default (line 576) | def get_default(cls, mode: str) -> "SearchSettings":
  class SearchMode (line 601) | class SearchMode(str, Enum):
  function select_search_filters (line 609) | def select_search_filters(

FILE: py/shared/abstractions/tool.py
  class Tool (line 6) | class Tool(R2RSerializable):
    class Config (line 15) | class Config:
    method set_context (line 19) | def set_context(self, context: Any) -> None:
    method execute (line 23) | async def execute(self, *args, **kwargs):
  class ToolResult (line 37) | class ToolResult(R2RSerializable):

FILE: py/shared/abstractions/user.py
  class Collection (line 12) | class Collection(BaseModel):
    class Config (line 23) | class Config:
    method __init__ (line 27) | def __init__(self, **data):
  class Token (line 33) | class Token(BaseModel):
  class TokenData (line 38) | class TokenData(BaseModel):
  class User (line 44) | class User(R2RSerializable):

FILE: py/shared/abstractions/vector.py
  class VectorType (line 12) | class VectorType(str, Enum):
  class IndexMethod (line 16) | class IndexMethod(str, Enum):
    method __str__ (line 32) | def __str__(self) -> str:
  class IndexMeasure (line 36) | class IndexMeasure(str, Enum):
    method __str__ (line 53) | def __str__(self) -> str:
    method ops (line 57) | def ops(self) -> str:
    method pgvector_repr (line 68) | def pgvector_repr(self) -> str:
  class IndexArgsIVFFlat (line 79) | class IndexArgsIVFFlat(R2RSerializable):
  class IndexArgsHNSW (line 90) | class IndexArgsHNSW(R2RSerializable):
  class VectorTableName (line 109) | class VectorTableName(str, Enum):
    method __str__ (line 119) | def __str__(self) -> str:
  class VectorQuantizationType (line 123) | class VectorQuantizationType(str, Enum):
    method __str__ (line 138) | def __str__(self) -> str:
    method db_type (line 142) | def db_type(self) -> str:
  class VectorQuantizationSettings (line 152) | class VectorQuantizationSettings(R2RSerializable):
  class Vector (line 158) | class Vector(R2RSerializable):
    method __init__ (line 165) | def __init__(self, **data):
    method __repr__ (line 176) | def __repr__(self) -> str:
  class VectorEntry (line 182) | class VectorEntry(R2RSerializable):
    method __str__ (line 194) | def __str__(self) -> str:
    method __repr__ (line 207) | def __repr__(self) -> str:
  class StorageResult (line 212) | class StorageResult(R2RSerializable):
    method __str__ (line 220) | def __str__(self) -> str:
    method __repr__ (line 224) | def __repr__(self) -> str:
  class IndexConfig (line 229) | class IndexConfig(BaseModel):

FILE: py/shared/api/models/auth/responses.py
  class TokenResponse (line 7) | class TokenResponse(BaseModel):

FILE: py/shared/api/models/base.py
  class R2RResults (line 8) | class R2RResults(BaseModel, Generic[T]):
  class PaginatedR2RResult (line 12) | class PaginatedR2RResult(BaseModel, Generic[T]):
  class GenericBooleanResponse (line 17) | class GenericBooleanResponse(BaseModel):
  class GenericMessageResponse (line 21) | class GenericMessageResponse(BaseModel):

FILE: py/shared/api/models/graph/responses.py
  class GraphResponse (line 18) | class GraphResponse(BaseModel):

FILE: py/shared/api/models/ingestion/responses.py
  class IngestionResponse (line 11) | class IngestionResponse(BaseModel):
    class Config (line 25) | class Config:
  class UpdateResponse (line 35) | class UpdateResponse(BaseModel):
    class Config (line 49) | class Config:
  class VectorIndexResponse (line 59) | class VectorIndexResponse(BaseModel):
  class VectorIndicesResponse (line 63) | class VectorIndicesResponse(BaseModel):

FILE: py/shared/api/models/management/responses.py
  class PromptResponse (line 13) | class PromptResponse(BaseModel):
  class ServerStats (line 22) | class ServerStats(BaseModel):
  class SettingsResponse (line 29) | class SettingsResponse(BaseModel):
  class ChunkResponse (line 36) | class ChunkResponse(BaseModel):
  class CollectionResponse (line 46) | class CollectionResponse(BaseModel):
  class ConversationResponse (line 59) | class ConversationResponse(BaseModel):
  class MessageResponse (line 66) | class MessageResponse(BaseModel):
  class ApiKey (line 72) | class ApiKey(BaseModel):
  class ApiKeyNoPriv (line 79) | class ApiKeyNoPriv(BaseModel):
  class LoginResponse (line 87) | class LoginResponse(BaseModel):
  class UsageLimit (line 92) | class UsageLimit(BaseModel):
  class StorageTypeLimit (line 98) | class StorageTypeLimit(BaseModel):
  class StorageLimits (line 104) | class StorageLimits(BaseModel):
  class RouteUsage (line 110) | class RouteUsage(BaseModel):
  class Usage (line 115) | class Usage(BaseModel):
  class SystemDefaults (line 121) | class SystemDefaults(BaseModel):
  class LimitsResponse (line 127) | class LimitsResponse(BaseModel):

FILE: py/shared/api/models/retrieval/responses.py
  class CitationSpan (line 19) | class CitationSpan(R2RSerializable):
  class Citation (line 36) | class Citation(R2RSerializable):
    class Config (line 81) | class Config:
  class RAGResponse (line 138) | class RAGResponse(R2RSerializable):
    class Config (line 159) | class Config:
  class AgentResponse (line 257) | class AgentResponse(R2RSerializable):
    class Config (line 263) | class Config:
  class DocumentSearchResult (line 405) | class DocumentSearchResult(BaseModel):
  class SSEEventBase (line 421) | class SSEEventBase(BaseModel):
  class SearchResultsData (line 427) | class SearchResultsData(BaseModel):
  class SearchResultsEvent (line 433) | class SearchResultsEvent(SSEEventBase):
  class DeltaPayload (line 438) | class DeltaPayload(BaseModel):
  class MessageDelta (line 444) | class MessageDelta(BaseModel):
  class Delta (line 449) | class Delta(BaseModel):
  class MessageData (line 453) | class MessageData(BaseModel):
  class MessageEvent (line 459) | class MessageEvent(SSEEventBase):
  class CitationSpanData (line 465) | class CitationSpanData(BaseModel):
  class CitationData (line 479) | class CitationData(BaseModel):
    class Config (line 508) | class Config:
  class CitationEvent (line 514) | class CitationEvent(SSEEventBase):
  class FinalAnswerData (line 520) | class FinalAnswerData(BaseModel):
  class FinalAnswerEvent (line 525) | class FinalAnswerEvent(SSEEventBase):
  class ToolCallData (line 531) | class ToolCallData(BaseModel):
  class ToolCallEvent (line 537) | class ToolCallEvent(SSEEventBase):
  class ToolResultData (line 543) | class ToolResultData(BaseModel):
  class ToolResultEvent (line 549) | class ToolResultEvent(SSEEventBase):
  class UnknownEvent (line 555) | class UnknownEvent(SSEEventBase):
  class ThinkingData (line 560) | class ThinkingData(BaseModel):
  class ThinkingEvent (line 566) | class ThinkingEvent(SSEEventBase):

FILE: py/shared/utils/base_utils.py
  function id_to_shorthand (line 25) | def id_to_shorthand(id: str | UUID):
  function format_search_results_for_llm (line 29) | def format_search_results_for_llm(
  function _generate_id_from_label (line 137) | def _generate_id_from_label(label) -> UUID:
  function generate_id (line 141) | def generate_id(label: Optional[str] = None) -> UUID:
  function generate_document_id (line 148) | def generate_document_id(filename: str, user_id: UUID) -> UUID:
  function generate_extraction_id (line 154) | def generate_extraction_id(
  function generate_default_user_collection_id (line 162) | def generate_default_user_collection_id(user_id: UUID) -> UUID:
  function generate_user_id (line 167) | def generate_user_id(email: str) -> UUID:
  function generate_default_prompt_id (line 172) | def generate_default_prompt_id(prompt_name: str) -> UUID:
  function generate_entity_document_id (line 177) | def generate_entity_document_id() -> UUID:
  function validate_uuid (line 183) | def validate_uuid(uuid_str: str) -> UUID:
  function update_settings_from_dict (line 187) | def update_settings_from_dict(server_settings, settings_dict: dict):
  function _decorate_vector_type (line 204) | def _decorate_vector_type(
  function _get_vector_column_str (line 211) | def _get_vector_column_str(
  function deep_update (line 229) | def deep_update(
  function tokens_count_for_message (line 250) | def tokens_count_for_message(message, encoding):
  function num_tokens_from_messages (line 272) | def num_tokens_from_messages(messages, model="gpt-4.1"):
  class SearchResultsCollector (line 288) | class SearchResultsCollector:
    method __init__ (line 294) | def __init__(self):
    method results (line 299) | def results(self):
    method results (line 304) | def results(self, value):
    method add_aggregate_result (line 329) | def add_aggregate_result(self, agg):
    method add_result (line 378) | def add_result(self, result_obj, source_type=None):
    method _detect_result_type (line 391) | def _detect_result_type(self, obj):
    method find_by_short_id (line 485) | def find_by_short_id(self, short_id):
    method get_results_by_type (line 539) | def get_results_by_type(self, type_name):
    method __repr__ (line 547) | def __repr__(self):
    method get_all_results (line 555) | def get_all_results(self) -> list[Tuple[str, Any]]:
  function convert_nonserializable_objects (line 563) | def convert_nonserializable_objects(obj):
  function dump_obj (line 592) | def dump_obj(obj) -> list[dict[str, Any]]:
  function dump_collector (line 606) | def dump_collector(collector: SearchResultsCollector) -> list[dict[str, ...
  function num_tokens (line 637) | def num_tokens(text, model="gpt-4o"):
  class CombinedMeta (line 647) | class CombinedMeta(AsyncSyncMeta, ABCMeta):
  function yield_sse_event (line 651) | async def yield_sse_event(event_name: str, payload: dict, chunk_size=1024):
  class SSEFormatter (line 675) | class SSEFormatter:
    method yield_citation_event (line 682) | async def yield_citation_event(
    method yield_final_answer_event (line 708) | async def yield_final_answer_event(
    method yield_message_event (line 717) | async def yield_message_event(text_segment, msg_id=None):
    method yield_thinking_event (line 738) | async def yield_thinking_event(text_segment, thinking_id=None):
    method yield_done_event (line 759) | def yield_done_event():
    method yield_error_event (line 763) | async def yield_error_event(error_message, error_id=None):
    method yield_tool_call_event (line 774) | async def yield_tool_call_event(tool_call_data):
    method yield_search_results_event (line 785) | async def yield_search_results_event(aggregated_results):
    method yield_tool_result_event (line 795) | async def yield_tool_result_event(tool_result_data):

FILE: py/shared/utils/splitter/text.py
  class BaseSerialized (line 58) | class BaseSerialized(TypedDict):
  class SerializedConstructor (line 67) | class SerializedConstructor(BaseSerialized):
  class SerializedSecret (line 74) | class SerializedSecret(BaseSerialized):
  class SerializedNotImplemented (line 80) | class SerializedNotImplemented(BaseSerialized):
  function try_neq_default (line 87) | def try_neq_default(value: Any, key: str, model: BaseModel) -> bool:
  class Serializable (line 104) | class Serializable(BaseModel, ABC):
    method is_lc_serializable (line 108) | def is_lc_serializable(cls) -> bool:
    method get_lc_namespace (line 113) | def get_lc_namespace(cls) -> list[str]:
    method lc_secrets (line 122) | def lc_secrets(self) -> dict[str, str]:
    method lc_attributes (line 130) | def lc_attributes(self) -> dict:
    method lc_id (line 139) | def lc_id(cls) -> list[str]:
    class Config (line 147) | class Config:
    method __repr_args__ (line 150) | def __repr_args__(self) -> Any:
    method __init__ (line 159) | def __init__(self, **kwargs: Any) -> None:
    method to_json (line 163) | def to_json(
    method to_json_not_implemented (line 231) | def to_json_not_implemented(self) -> SerializedNotImplemented:
  function _replace_secrets (line 235) | def _replace_secrets(
  function to_json_not_implemented (line 256) | def to_json_not_implemented(obj: object) -> SerializedNotImplemented:
  class SplitterDocument (line 290) | class SplitterDocument(Serializable):
    method __init__ (line 300) | def __init__(self, page_content: str, **kwargs: Any) -> None:
    method is_lc_serializable (line 305) | def is_lc_serializable(cls) -> bool:
    method get_lc_namespace (line 310) | def get_lc_namespace(cls) -> list[str]:
  class BaseDocumentTransformer (line 315) | class BaseDocumentTransformer(ABC):
    method transform_documents (line 351) | def transform_documents(
    method atransform_documents (line 363) | async def atransform_documents(
  function _make_spacy_pipe_for_splitting (line 380) | def _make_spacy_pipe_for_splitting(
  function _split_text_with_regex (line 400) | def _split_text_with_regex(
  class TextSplitter (line 421) | class TextSplitter(BaseDocumentTransformer, ABC):
    method __init__ (line 424) | def __init__(
    method split_text (line 458) | def split_text(self, text: str) -> list[str]:
    method create_documents (line 461) | def create_documents(
    method split_documents (line 483) | def split_documents(
    method _join_docs (line 493) | def _join_docs(self, docs: list[str], separator: str) -> Optional[str]:
    method _merge_splits (line 502) | def _merge_splits(
    method from_huggingface_tokenizer (line 549) | def from_huggingface_tokenizer(
    method from_tiktoken_encoder (line 572) | def from_tiktoken_encoder(
    method transform_documents (line 613) | def transform_documents(
  class CharacterTextSplitter (line 620) | class CharacterTextSplitter(TextSplitter):
    method __init__ (line 625) | def __init__(
    method split_text (line 636) | def split_text(self, text: str) -> list[str]:
  class LineType (line 649) | class LineType(TypedDict):
  class HeaderType (line 656) | class HeaderType(TypedDict):
  class MarkdownHeaderTextSplitter (line 664) | class MarkdownHeaderTextSplitter:
    method __init__ (line 667) | def __init__(
    method aggregate_lines_to_chunks (line 690) | def aggregate_lines_to_chunks(
    method split_text (line 737) | def split_text(self, text: str) -> list[SplitterDocument]:
  class ElementType (line 868) | class ElementType(TypedDict):
  class HTMLHeaderTextSplitter (line 877) | class HTMLHeaderTextSplitter:
    method __init__ (line 883) | def __init__(
    method aggregate_elements_to_chunks (line 901) | def aggregate_elements_to_chunks(
    method split_text_from_url (line 932) | def split_text_from_url(self, url: str) -> list[SplitterDocument]:
    method split_text (line 941) | def split_text(self, text: str) -> list[SplitterDocument]:
    method split_text_from_file (line 949) | def split_text_from_file(self, file: Any) -> list[SplitterDocument]:
  class Tokenizer (line 1039) | class Tokenizer:
  function split_text_on_tokens (line 1052) | def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> list[str]:
  class TokenTextSplitter (line 1069) | class TokenTextSplitter(TextSplitter):
    method __init__ (line 1072) | def __init__(
    method split_text (line 1099) | def split_text(self, text: str) -> list[str]:
  class SentenceTransformersTokenTextSplitter (line 1117) | class SentenceTransformersTokenTextSplitter(TextSplitter):
    method __init__ (line 1120) | def __init__(
    method _initialize_chunk_configuration (line 1146) | def _initialize_chunk_configuration(
    method split_text (line 1164) | def split_text(self, text: str) -> list[str]:
    method count_tokens (line 1177) | def count_tokens(self, *, text: str) -> int:
    method _encode (line 1182) | def _encode(self, text: str) -> list[int]:
  class Language (line 1191) | class Language(str, Enum):
  class RecursiveCharacterTextSplitter (line 1219) | class RecursiveCharacterTextSplitter(TextSplitter):
    method __init__ (line 1225) | def __init__(
    method _split_text (line 1246) | def _split_text(self, text: str, separators: list[str]) -> list[str]:
    method split_text (line 1288) | def split_text(self, text: str) -> list[str]:
    method from_language (line 1292) | def from_language(
    method get_separators_for_language (line 1299) | def get_separators_for_language(language: Language) -> list[str]:
  class NLTKTextSplitter (line 1765) | class NLTKTextSplitter(TextSplitter):
    method __init__ (line 1768) | def __init__(
    method split_text (line 1783) | def split_text(self, text: str) -> list[str]:
  class SpacyTextSplitter (line 1790) | class SpacyTextSplitter(TextSplitter):
    method __init__ (line 1799) | def __init__(
    method split_text (line 1813) | def split_text(self, text: str) -> list[str]:
  class KonlpyTextSplitter (line 1819) | class KonlpyTextSplitter(TextSplitter):
    method __init__ (line 1825) | def __init__(
    method split_text (line 1842) | def split_text(self, text: str) -> list[str]:
  class PythonCodeTextSplitter (line 1849) | class PythonCodeTextSplitter(RecursiveCharacterTextSplitter):
    method __init__ (line 1852) | def __init__(self, **kwargs: Any) -> None:
  class MarkdownTextSplitter (line 1858) | class MarkdownTextSplitter(RecursiveCharacterTextSplitter):
    method __init__ (line 1861) | def __init__(self, **kwargs: Any) -> None:
  class LatexTextSplitter (line 1867) | class LatexTextSplitter(RecursiveCharacterTextSplitter):
    method __init__ (line 1870) | def __init__(self, **kwargs: Any) -> None:
  class RecursiveJsonSplitter (line 1876) | class RecursiveJsonSplitter:
    method __init__ (line 1877) | def __init__(
    method _json_size (line 1889) | def _json_size(data: dict) -> int:
    method _set_nested_dict (line 1894) | def _set_nested_dict(d: dict, path: list[str], value: Any) -> None:
    method _list_to_dict_preprocessing (line 1900) | def _list_to_dict_preprocessing(self, data: Any) -> Any:
    method _json_split (line 1916) | def _json_split(
    method split_json (line 1951) | def split_json(
    method split_text (line 1970) | def split_text(
    method create_documents (line 1982) | def create_documents(

FILE: py/tests/integration/conftest.py
  class RetryableR2RAsyncClient (line 11) | class RetryableR2RAsyncClient(R2RAsyncClient):
    method _make_request (line 14) | async def _make_request(self, method, endpoint, version="v3", **kwargs):
  class RetryableR2RClient (line 36) | class RetryableR2RClient(R2RClient):
    method _make_request (line 39) | def _make_request(self, method, endpoint, version="v3", **kwargs):
  class TestConfig (line 63) | class TestConfig:
    method __init__ (line 64) | def __init__(self):
  function config (line 75) | def config() -> TestConfig:
  function client (line 80) | async def client(config) -> AsyncGenerator[R2RClient, None]:
  function mutable_client (line 86) | def mutable_client(config) -> R2RClient:
  function aclient (line 92) | async def aclient(config) -> AsyncGenerator[R2RAsyncClient, None]:
  function superuser_client (line 98) | async def superuser_client(
  function test_document (line 108) | def test_document(client: R2RClient):
  function test_collection (line 125) | def test_collection(client: R2RClient, test_document):

FILE: py/tests/integration/test_agent.py
  function test_agent_basic_response (line 5) | def test_agent_basic_response(client, test_collection):
  function test_agent_conversation_memory (line 14) | def test_agent_conversation_memory(client, test_collection):
  function test_agent_rag_tool_usage (line 35) | def test_agent_rag_tool_usage(client, test_collection):
  function test_agent_rag_tool_usage2 (line 56) | def test_agent_rag_tool_usage2(client, test_collection):
  function test_research_agent_client (line 112) | def test_research_agent_client(client):
  function test_agent_respects_max_tokens (line 122) | def test_agent_respects_max_tokens(client, test_collection):
  function test_agent_model_selection (line 142) | def test_agent_model_selection(client, test_collection):
  function test_agent_response_timing (line 159) | def test_agent_response_timing(client, test_collection):
  function test_agent_handles_large_context (line 173) | def test_agent_handles_large_context(client):

FILE: py/tests/integration/test_base.py
  class BaseTest (line 6) | class BaseTest:
    method cleanup_resource (line 10) | async def cleanup_resource(cleanup_func,

FILE: py/tests/integration/test_chunks.py
  class AsyncR2RTestClient (line 11) | class AsyncR2RTestClient:
    method __init__ (line 14) | def __init__(self, base_url: str = "http://localhost:7272"):
    method create_document (line 17) | async def create_document(self,
    method delete_document (line 24) | async def delete_document(self, doc_id: str):
    method list_chunks (line 27) | async def list_chunks(self, doc_id: str):
    method retrieve_chunk (line 31) | async def retrieve_chunk(self, chunk_id: str):
    method update_chunk (line 35) | async def update_chunk(self,
    method delete_chunk (line 46) | async def delete_chunk(self, chunk_id: str):
    method search_chunks (line 50) | async def search_chunks(self, query: str, limit: int = 5):
    method register_user (line 55) | async def register_user(self, email: str, password: str):
    method login_user (line 58) | async def login_user(self, email: str, password: str):
    method logout_user (line 61) | async def logout_user(self):
  function test_client (line 66) | async def test_client() -> AsyncGenerator[AsyncR2RTestClient, None]:
  function test_document (line 72) | async def test_document(
  class TestChunks (line 87) | class TestChunks:
    method test_create_and_list_chunks (line 90) | async def test_create_and_list_chunks(self,
    method test_retrieve_chunk (line 104) | async def test_retrieve_chunk(self, test_client: AsyncR2RTestClient,
    method test_update_chunk (line 115) | async def test_update_chunk(self, test_client: AsyncR2RTestClient,
    method test_delete_chunk (line 127) | async def test_delete_chunk(self, test_client: AsyncR2RTestClient,
    method test_search_chunks (line 142) | async def test_search_chunks(self, test_client: AsyncR2RTestClient,
    method test_unauthorized_chunk_access (line 159) | async def test_unauthorized_chunk_access(self,
    method test_list_chunks_with_filters (line 177) | async def test_list_chunks_with_filters(self,
    method test_list_chunks_pagination (line 193) | async def test_list_chunks_pagination(self,
    method test_list_chunks_with_multiple_documents (line 236) | async def test_list_chunks_with_multiple_documents(
  function cleanup_documents (line 278) | async def cleanup_documents(test_client: AsyncR2RTestClient):

FILE: py/tests/integration/test_collections.py
  function test_document_2 (line 9) | def test_document_2(client: R2RClient):
  function test_create_collection (line 24) | def test_create_collection(client: R2RClient):
  function test_list_collections (line 33) | def test_list_collections(client: R2RClient, test_collection):
  function test_retrieve_collection (line 38) | def test_retrieve_collection(client: R2RClient, test_collection):
  function test_update_collection (line 46) | def test_update_collection(client: R2RClient, test_collection):
  function test_add_document_to_collection (line 59) | def test_add_document_to_collection(client: R2RClient, test_collection,
  function test_list_documents_in_collection (line 70) | def test_list_documents_in_collection(client: R2RClient, test_collection,
  function test_remove_document_from_collection (line 80) | def test_remove_document_from_collection(client: R2RClient, test_collect...
  function test_remove_non_member_user_from_collection (line 91) | def test_remove_non_member_user_from_collection(mutable_client: R2RClient):
  function test_delete_collection (line 126) | def test_delete_collection(client: R2RClient):
  function test_add_user_to_non_existent_collection (line 138) | def test_add_user_to_non_existent_collection(mutable_client: R2RClient):
  function test_create_collection_without_name (line 157) | def test_create_collection_without_name(client: R2RClient):
  function test_filter_collections_by_non_existent_id (line 169) | def test_filter_collections_by_non_existent_id(client: R2RClient):
  function test_list_documents_in_empty_collection (line 177) | def test_list_documents_in_empty_collection(client: R2RClient):
  function test_remove_document_not_in_collection (line 187) | def test_remove_document_not_in_collection(client: R2RClient, test_docum...
  function test_add_non_existent_document_to_collection (line 202) | def test_add_non_existent_document_to_collection(client: R2RClient):
  function test_delete_non_existent_collection (line 217) | def test_delete_non_existent_collection(client: R2RClient):
  function test_retrieve_collection_by_name (line 226) | def test_retrieve_collection_by_name(client: R2RClient):

FILE: py/tests/integration/test_collections_users_interaction.py
  function normal_user_client (line 16) | def normal_user_client(mutable_client: R2RClient):
  function another_normal_user_client (line 37) | def another_normal_user_client(config):
  function user_owned_collection (line 56) | def user_owned_collection(normal_user_client: R2RClient):
  function superuser_owned_collection (line 72) | def superuser_owned_collection(client: R2RClient):
  function test_non_member_cannot_view_collection (line 86) | def test_non_member_cannot_view_collection(normal_user_client,
  function test_collection_owner_can_view_collection (line 97) | def test_collection_owner_can_view_collection(normal_user_client: R2RCli...
  function test_collection_member_can_view_collection (line 106) | def test_collection_member_can_view_collection(client,
  function test_non_owner_member_cannot_edit_collection (line 134) | def test_non_owner_member_cannot_edit_collection(
  function test_non_owner_member_cannot_delete_collection (line 154) | def test_non_owner_member_cannot_delete_collection(
  function test_non_owner_member_cannot_add_other_users (line 173) | def test_non_owner_member_cannot_add_other_users(
  function test_owner_can_remove_member_from_collection (line 207) | def test_owner_can_remove_member_from_collection(
  function test_superuser_can_access_any_collection (line 230) | def test_superuser_can_access_any_collection(client: R2RClient,
  function test_unauthenticated_cannot_access_collections (line 245) | def test_unauthenticated_cannot_access_collections(config,
  function test_user_cannot_add_document_to_collection_they_cannot_edit (line 260) | def test_user_cannot_add_document_to_collection_they_cannot_edit(
  function test_user_cannot_remove_document_from_collection_they_cannot_edit (line 310) | def test_user_cannot_remove_document_from_collection_they_cannot_edit(
  function test_normal_user_cannot_make_another_user_superuser (line 346) | def test_normal_user_cannot_make_another_user_superuser(
  function test_normal_user_cannot_view_other_users_if_not_superuser (line 361) | def test_normal_user_cannot_view_other_users_if_not_superuser(
  function test_normal_user_cannot_update_other_users_details (line 370) | def test_normal_user_cannot_update_other_users_details(
  function test_owner_cannot_promote_member_to_superuser_via_collection (line 393) | def test_owner_cannot_promote_member_to_superuser_via_collection(
  function test_member_cannot_view_other_users_info (line 412) | def test_member_cannot_view_other_users_info(
  function test_unauthenticated_user_cannot_join_collection (line 432) | def test_unauthenticated_user_cannot_join_collection(config,
  function test_non_owner_cannot_remove_users_they_did_not_add (line 451) | def test_non_owner_cannot_remove_users_they_did_not_add(
  function test_owner_cannot_access_deleted_member_info_after_removal (line 471) | def test_owner_cannot_access_deleted_member_info_after_removal(
  function test_member_cannot_add_document_to_non_existent_collection (line 498) | def test_member_cannot_add_document_to_non_existent_collection(

FILE: py/tests/integration/test_conversations.py
  function test_conversation (line 11) | def test_conversation(client: R2RClient):
  function test_create_conversation (line 20) | def test_create_conversation(client: R2RClient):
  function test_list_conversations (line 27) | def test_list_conversations(client: R2RClient, test_conversation):
  function test_retrieve_conversation (line 33) | def test_retrieve_conversation(client: R2RClient, test_conversation):
  function test_delete_conversation (line 42) | def test_delete_conversation(client: R2RClient):
  function test_add_message (line 54) | def test_add_message(client: R2RClient, test_conversation):
  function test_retrieve_non_existent_conversation (line 69) | def test_retrieve_non_existent_conversation(client: R2RClient):
  function test_delete_non_existent_conversation (line 77) | def test_delete_non_existent_conversation(client: R2RClient):
  function test_add_message_to_non_existent_conversation (line 85) | def test_add_message_to_non_existent_conversation(client: R2RClient):
  function test_update_message (line 98) | def test_update_message(client: R2RClient, test_conversation):
  function test_update_non_existent_message (line 130) | def test_update_non_existent_message(client: R2RClient, test_conversation):
  function test_add_message_with_empty_content (line 140) | def test_add_message_with_empty_content(client: R2RClient, test_conversa...
  function test_add_message_invalid_role (line 152) | def test_add_message_invalid_role(client: R2RClient, test_conversation):
  function test_add_message_to_deleted_conversation (line 163) | def test_add_message_to_deleted_conversation(client: R2RClient):
  function test_update_message_with_additional_metadata (line 179) | def test_update_message_with_additional_metadata(client: R2RClient,
  function test_new_conversation_gets_named_after_first_agent_interaction (line 223) | def test_new_conversation_gets_named_after_first_agent_interaction(clien...
  function test_existing_named_conversation_preserves_name_after_agent_interaction (line 262) | def test_existing_named_conversation_preserves_name_after_agent_interact...

FILE: py/tests/integration/test_documents.py
  function cleanup_documents (line 10) | def cleanup_documents(client: R2RClient):
  function test_create_document_with_file (line 27) | def test_create_document_with_file(client: R2RClient, cleanup_documents):
  function test_create_document_with_raw_text (line 37) | def test_create_document_with_raw_text(client: R2RClient, cleanup_docume...
  function test_create_document_with_chunks (line 52) | def test_create_document_with_chunks(client: R2RClient, cleanup_documents):
  function test_create_document_different_modes (line 69) | def test_create_document_different_modes(client: R2RClient, cleanup_docu...
  function test_list_documents (line 89) | def test_list_documents(client: R2RClient, test_document):
  function test_retrieve_document (line 96) | def test_retrieve_document(client: R2RClient, test_document):
  function test_download_document (line 101) | def test_download_document(client: R2RClient, test_document):
  function test_delete_document (line 109) | def test_delete_document(client: R2RClient):
  function test_delete_document_by_filter (line 122) | def test_delete_document_by_filter(client: R2RClient):
  function test_list_document_collections (line 144) | def test_list_document_collections(client: R2RClient, test_document):
  function test_extract_document (line 154) | def test_extract_document(client: R2RClient, test_document):
  function test_list_entities (line 162) | def test_list_entities(client: R2RClient, test_document):
  function test_list_relationships (line 173) | def test_list_relationships(client: R2RClient, test_document):
  function test_search_documents (line 183) | def test_search_documents(client: R2RClient, test_document):
  function test_list_document_chunks (line 196) | def test_list_document_chunks(mutable_client: R2RClient, cleanup_documen...
  function test_search_documents_extended (line 210) | def test_search_documents_extended(client: R2RClient, cleanup_documents):
  function test_retrieve_document_not_found (line 228) | def test_retrieve_document_not_found(client):
  function test_delete_document_non_existent (line 235) | def test_delete_document_non_existent(client):
  function test_get_document_collections_non_superuser (line 244) | def test_get_document_collections_non_superuser(client):
  function test_access_document_not_owned (line 258) | def test_access_document_not_owned(client: R2RClient, cleanup_documents):
  function test_list_documents_with_pagination (line 277) | def test_list_documents_with_pagination(mutable_client: R2RClient,
  function test_ingest_invalid_chunks (line 294) | def test_ingest_invalid_chunks(client):
  function test_ingest_too_many_chunks (line 305) | def test_ingest_too_many_chunks(client: R2RClient):
  function test_chunk_size_and_overlap (line 313) | def test_chunk_size_and_overlap(client: R2RClient, cleanup_documents):
  function test_delete_by_complex_filter (line 355) | def test_delete_by_complex_filter(client: R2RClient, cleanup_documents):
  function test_search_documents_no_match (line 385) | def test_search_documents_no_match(client: R2RClient, cleanup_documents):
  function test_delete_by_workflow_metadata (line 415) | def test_delete_by_workflow_metadata(client: R2RClient, cleanup_documents):
  function test_delete_by_classification_metadata (line 496) | def test_delete_by_classification_metadata(client: R2RClient,
  function test_delete_by_version_metadata (line 560) | def test_delete_by_version_metadata(client: R2RClient, cleanup_documents):

FILE: py/tests/integration/test_filters.py
  function setup_docs_with_collections (line 9) | def setup_docs_with_collections(client: R2RClient):
  function test_collection_id_eq_filter (line 60) | def test_collection_id_eq_filter(client: R2RClient,
  function test_collection_id_ne_filter (line 79) | def test_collection_id_ne_filter(client: R2RClient,
  function test_collection_id_in_filter (line 101) | def test_collection_id_in_filter(client: R2RClient,
  function test_collection_id_nin_filter (line 123) | def test_collection_id_nin_filter(client: R2RClient,
  function test_collections_id_contains_filter (line 145) | def test_collections_id_contains_filter(client: R2RClient,
  function test_collection_id_contains_multiple (line 166) | def test_collection_id_contains_multiple(client: R2RClient,
  function test_delete_by_collection_id_eq (line 188) | def test_delete_by_collection_id_eq(client: R2RClient,

FILE: py/tests/integration/test_graphs.py
  function config (line 9) | def config():
  function client (line 20) | def client(config):
  function test_collection (line 28) | def test_collection(client):
  function test_list_graphs (line 42) | def test_list_graphs(client: R2RClient):
  function test_create_and_get_graph (line 47) | def test_create_and_get_graph(client: R2RClient, test_collection):
  function test_update_graph (line 54) | def test_update_graph(client: R2RClient, test_collection):
  function test_list_entities (line 68) | def test_list_entities(client: R2RClient, test_collection):
  function test_create_and_get_entity (line 75) | def test_create_and_get_entity(client: R2RClient, test_collection):
  function test_list_relationships (line 92) | def test_list_relationships(client: R2RClient, test_collection):
  function test_create_and_get_relationship (line 99) | def test_create_and_get_relationship(client: R2RClient, test_collection):
  function test_list_communities (line 174) | def test_list_communities(client: R2RClient, test_collection):
  function test_create_and_get_community (line 181) | def test_create_and_get_community(client: R2RClient, test_collection):
  function test_update_community (line 200) | def test_update_community(client: R2RClient, test_collection):
  function test_pull_operation (line 225) | def test_pull_operation(client: R2RClient, test_collection):
  function test_error_handling (line 231) | def test_error_handling(client: R2RClient):

FILE: py/tests/integration/test_indices.py
  function config (line 7) | def config():
  function client (line 18) | def client(config):
  function test_list_indices (line 51) | def test_list_indices(client: R2RClient):
  function test_error_handling (line 91) | def test_error_handling(client: R2RClient):

FILE: py/tests/integration/test_ingestion.py
  function file_ingestion (line 33) | def file_ingestion(
  function config (line 130) | def config():
  function client (line 141) | def client(config):
  function test_file_type_ingestion (line 184) | def test_file_type_ingestion(client: R2RClient, file_type: str,
  function test_hires_ingestion (line 208) | def test_hires_ingestion(client: R2RClient, file_type: str, file_path: s...
  function test_ocr_ingestion (line 242) | def test_ocr_ingestion(client: R2RClient, file_type: str, file_path: str):
  function test_custom_ingestion_config (line 253) | def test_custom_ingestion_config(client: R2RClient):
  function test_raw_text_ingestion (line 280) | def test_raw_text_ingestion(client: R2RClient):
  function test_chunks_ingestion (line 307) | def test_chunks_ingestion(client: R2RClient):
  function test_metadata_handling (line 320) | def test_metadata_handling(client: R2RClient):
  function test_img_ingestion (line 351) | def test_img_ingestion(client: R2RClient):
  function test_metadata_title_handling (line 378) | def test_metadata_title_handling(client: R2RClient):

FILE: py/tests/integration/test_retrieval.py
  function config (line 10) | def config():
  function client (line 21) | def client(config):
  function test_search_basic_mode (line 28) | def test_search_basic_mode(client: R2RClient):
  function test_search_advanced_mode_with_filters (line 34) | def test_search_advanced_mode_with_filters(client: R2RClient):
  function test_search_custom_mode (line 47) | def test_search_custom_mode(client: R2RClient):
  function test_rag_query (line 59) | def test_rag_query(client: R2RClient):
  function test_rag_with_filter (line 74) | def test_rag_with_filter(client: R2RClient):
  function test_rag_stream_query (line 102) | def test_rag_stream_query(client: R2RClient):
  function test_agent_query (line 130) | def test_agent_query(client: R2RClient):
  function test_agent_query_stream (line 147) | def test_agent_query_stream(client: R2RClient):
  function test_completion (line 173) | def test_completion(client: R2RClient):
  function test_embedding (line 203) | def test_embedding(client: R2RClient):
  function test_error_handling (line 209) | def test_error_handling(client: R2RClient):
  function test_no_results_scenario (line 219) | def test_no_results_scenario(client: R2RClient):
  function test_pagination_limit_one (line 233) | def test_pagination_limit_one(client: R2RClient):
  function test_pagination_offset (line 248) | def test_pagination_offset(client: R2RClient):
  function test_rag_task_prompt (line 271) | def test_rag_task_prompt(client: R2RClient):
  function test_agent_conversation_id (line 292) | def test_agent_conversation_id(client: R2RClient):
  function test_complex_filters_and_fulltext (line 327) | def test_complex_filters_and_fulltext(client: R2RClient, test_collection):
  function test_complex_nested_filters (line 440) | def test_complex_nested_filters(client: R2RClient, test_collection):
  function test_filters_no_match (line 492) | def test_filters_no_match(client: R2RClient):
  function test_pagination_extremes (line 506) | def test_pagination_extremes(client: R2RClient):
  function test_full_text_stopwords (line 524) | def test_full_text_stopwords(client: R2RClient):
  function test_full_text_non_ascii (line 538) | def test_full_text_non_ascii(client: R2RClient):
  function test_missing_fields (line 552) | def test_missing_fields(client: R2RClient):
  function test_rag_with_large_context (line 567) | def test_rag_with_large_context(client: R2RClient):
  function test_agent_long_conversation (line 585) | def test_agent_long_conversation(client: R2RClient):
  function test_filter_by_document_type (line 638) | def test_filter_by_document_type(client: R2RClient):
  function test_search_hyde_mode (line 656) | def test_search_hyde_mode(client: R2RClient):
  function test_search_rag_fusion_mode (line 705) | def test_search_rag_fusion_mode(client: R2RClient):
  function test_rag_fusion_mode_with_subqueries (line 740) | def test_rag_fusion_mode_with_subqueries(client: R2RClient):
  function test_collection_id_filters (line 762) | def test_collection_id_filters(client: R2RClient):

FILE: py/tests/integration/test_retrieval_advanced.py
  function test_semantic_search_with_near_duplicates (line 7) | def test_semantic_search_with_near_duplicates(client: R2RClient):
  function test_semantic_search_multilingual (line 42) | def test_semantic_search_multilingual(client: R2RClient):
  function test_rag_context_window_limits (line 122) | def test_rag_context_window_limits(client: R2RClient):
  function _extract_sources (line 254) | def _extract_sources(content: str) -> list[str]:

FILE: py/tests/integration/test_users.py
  function config (line 9) | def config():
  function client (line 21) | def client(config):
  function superuser_login (line 26) | def superuser_login(client: R2RClient, config):
  function register_and_return_user_id (line 34) | def register_and_return_user_id(client: R2RClient, email: str,
  function test_register_user (line 39) | def test_register_user(client: R2RClient):
  function test_user_refresh_token (line 47) | def test_user_refresh_token(client: R2RClient):
  function test_change_password (line 59) | def test_change_password(client: R2RClient):
  function test_request_and_reset_password (line 85) | def test_request_and_reset_password(client: R2RClient):
  function test_users_list (line 111) | def test_users_list(client: R2RClient, superuser_login):
  function test_get_current_user (line 118) | def test_get_current_user(client: R2RClient, superuser_login):
  function test_get_user_by_id (line 124) | def test_get_user_by_id(client: R2RClient, superuser_login):
  function test_update_user (line 134) | def test_update_user(client: R2RClient, superuser_login):
  function test_user_collections (line 145) | def test_user_collections(client: R2RClient, superuser_login, config):
  function test_add_remove_user_from_collection (line 156) | def test_add_remove_user_from_collection(client: R2RClient, superuser_lo...
  function test_delete_user (line 185) | def test_delete_user(client: R2RClient):
  function test_superuser_downgrade_permissions (line 205) | def test_superuser_downgrade_permissions(client: R2RClient, superuser_lo...
  function test_non_owner_delete_collection (line 240) | def test_non_owner_delete_collection(client: R2RClient):
  function test_update_user_with_invalid_email (line 276) | def test_update_user_with_invalid_email(client: R2RClient, superuser_log...
  function test_update_user_email_already_exists (line 294) | def test_update_user_email_already_exists(client: R2RClient, superuser_l...
  function test_delete_user_with_incorrect_password (line 316) | def test_delete_user_with_incorrect_password(client: R2RClient):
  function test_login_with_incorrect_password (line 335) | def test_login_with_incorrect_password(client: R2RClient):
  function test_refresh_token (line 348) | def test_refresh_token(client: R2RClient):
  function test_verification_with_invalid_c

Download .json

Condensed preview — 501 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,944K chars).

[
  {
    "path": ".gitattributes",
    "chars": 92,
    "preview": "*.html linguist-documentation\n*.ipynb linguist-documentation\ntemplates/** linguist-vendored\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "chars": 834,
    "preview": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**Describe the b"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/custom.md",
    "chars": 124,
    "preview": "---\nname: Custom issue template\nabout: Describe this issue template's purpose here.\ntitle: ''\nlabels: ''\nassignees: ''\n\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "chars": 595,
    "preview": "---\nname: Feature request\nabout: Suggest an idea for this project\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**Is your fea"
  },
  {
    "path": ".github/actions/login-docker/action.yml",
    "chars": 449,
    "preview": "name: 'Login Docker'\ndescription: 'Sets up Docker for running R2R'\ninputs:\n  docker_username:\n    description: 'Docker H"
  },
  {
    "path": ".github/actions/setup-docker/action.yml",
    "chars": 331,
    "preview": "name: 'Setup Docker'\ndescription: 'Sets up Docker for running R2R'\nruns:\n  using: \"composite\"\n  steps:\n    - name: Set u"
  },
  {
    "path": ".github/actions/setup-postgres-ext/action.yml",
    "chars": 4240,
    "preview": "name: 'Setup PostgreSQL'\ndescription: 'Sets up PostgreSQL with pgvector'\ninputs:\n  os:\n    description: 'Operating syste"
  },
  {
    "path": ".github/actions/setup-python-full/action.yml",
    "chars": 1125,
    "preview": "name: 'Setup Python for R2R Full'\ndescription: 'Sets up Python and installs R2R dependencies for full installation'\n\ninp"
  },
  {
    "path": ".github/actions/setup-python-light/action.yml",
    "chars": 991,
    "preview": "name: 'Setup Python for R2R Light'\ndescription: 'Sets up Python environment and installs dependencies using uv'\n\ninputs:"
  },
  {
    "path": ".github/actions/start-r2r-full/action.yml",
    "chars": 560,
    "preview": "name: 'Start R2R Server'\ndescription: 'Starts the R2R server'\nruns:\n  using: \"composite\"\n  steps:\n  - name: Inspect Dock"
  },
  {
    "path": ".github/actions/start-r2r-light/action.yml",
    "chars": 469,
    "preview": "name: 'Start R2R Server'\ndescription: 'Starts the R2R server'\ninputs:\n  config-name:\n    description: 'The R2R configura"
  },
  {
    "path": ".github/workflows/build-cluster-service-docker.yml",
    "chars": 1366,
    "preview": "name: Build and Publish Cluster Service Docker Image\n\non:\n  workflow_dispatch:\n\nenv:\n  REGISTRY_BASE: ragtoriches\n\njobs:"
  },
  {
    "path": ".github/workflows/build-r2r-docker.yml",
    "chars": 3382,
    "preview": "name: Build and Publish R2R Docker Image\n\non:\n  workflow_dispatch:\n\nenv:\n  REGISTRY_IMAGE: sciphiai/r2r\n\njobs:\n  prepare"
  },
  {
    "path": ".github/workflows/build-unst-service-docker.yml",
    "chars": 1374,
    "preview": "name: Build and Publish Unstructured Service Docker Image\n\non:\n  workflow_dispatch:\n\nenv:\n  REGISTRY_BASE: ragtoriches\n\n"
  },
  {
    "path": ".github/workflows/publish-to-npm.yml",
    "chars": 713,
    "preview": "name: Publish NPM Package\n\non:\n  workflow_dispatch:\n\njobs:\n  publish:\n    runs-on: ubuntu-latest\n    defaults:\n      run"
  },
  {
    "path": ".github/workflows/publish-to-pypi.yml",
    "chars": 1672,
    "preview": "name: Publish to PyPI\n\non:\n  push:\n    branches:\n      - dev\n      - dev-minor\n  workflow_dispatch:\n\njobs:\n  publish:\n  "
  },
  {
    "path": ".github/workflows/quality.yml",
    "chars": 592,
    "preview": "name: Code Quality Checks\n\non:\n  push:\n    branches: [ '**' ]\n  pull_request:\n\njobs:\n  pre-commit:\n    runs-on: ubuntu-l"
  },
  {
    "path": ".github/workflows/r2r-full-py-integration-tests.yml",
    "chars": 2478,
    "preview": "name: R2R Full Python Integration Test (ubuntu)\n\non:\n  workflow_dispatch:\n\njobs:\n  integration-test:\n    runs-on: ubuntu"
  },
  {
    "path": ".github/workflows/r2r-js-sdk-ci.yml",
    "chars": 640,
    "preview": "name: R2R JS SDK Integration CI\n\non:\n  push:\n    branches: [main]\n    paths:\n      - 'js/sdk/**'\n  pull_request:\n    bra"
  },
  {
    "path": ".github/workflows/r2r-js-sdk-integration-tests.yml",
    "chars": 3020,
    "preview": "name: R2R JS SDK Integration Tests\n\non:\n  push:\n    branches:\n      - '**'\n\njobs:\n  setup:\n    runs-on: ubuntu-latest\n  "
  },
  {
    "path": ".github/workflows/r2r-light-py-integration-tests.yml",
    "chars": 9863,
    "preview": "name: R2R Light Python Integration Test (ubuntu)\n\non:\n  push:\n    branches:\n      - main\n    paths:\n      - 'py/**'\n    "
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 2499,
    "preview": "repos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.0.0\n    hooks:\n      - id: trailing-whitespa"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 2071,
    "preview": "# Contributor Covenant Code of Conduct Summary\n\nTL;DR: Be nice. Be respectful. Be professional. Don't be a jerk.\n\n## Com"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 1006,
    "preview": "# R2R Contribution Guide\n\n## Quick Start\n\n- **Pre-Discussion**: Feel free to propose your ideas via issues, [Discord](ht"
  },
  {
    "path": "LICENSE.md",
    "chars": 1083,
    "preview": "The MIT License (MIT)\n\nCopyright (c) 2024 EmergentAGI Inc.\n\nPermission is hereby granted, free of charge, to any person "
  },
  {
    "path": "MANIFEST.md",
    "chars": 96,
    "preview": "# The R2R Manifest\n\nWe will do our best to build useful AI tools for developers _(before AGI)_.\n"
  },
  {
    "path": "SECURITY.md",
    "chars": 2603,
    "preview": "\n# Security Policy\n\nAt R2R, we take the security of our project and its users seriously. We appreciate the contributions"
  },
  {
    "path": "deployment/k8s/kustomizations/helm-values_hatchet.yaml",
    "chars": 5009,
    "preview": "# sharedConfig is inherited by all backend services: api, grpc, controllers, scheduler\nsharedConfig:\n  # you can disable"
  },
  {
    "path": "deployment/k8s/kustomizations/helm-values_postgresql.yaml",
    "chars": 357,
    "preview": "auth:\n  existingSecret: r2r-hatchet-secrets\n  secretKeys:\n    adminPasswordKey: HATCHET_DATABASE_POSTGRES_POSTGRES_PASSW"
  },
  {
    "path": "deployment/k8s/kustomizations/include/cm-hatchet.yaml",
    "chars": 604,
    "preview": "---\n# hatchet-configmap.yaml\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: hatchet-configmap\n  annotations:\n    argoc"
  },
  {
    "path": "deployment/k8s/kustomizations/include/cm-hatchet_OLD.yaml",
    "chars": 1426,
    "preview": "---\n# hatchet-configmap.yaml\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: hatchet-configmap\n  annotations:\n    argoc"
  },
  {
    "path": "deployment/k8s/kustomizations/include/cm-init-scripts-hatchet.yaml",
    "chars": 10078,
    "preview": "# This file contains the initialization scripts used by the InitContainers in the Job manifests.\n\napiVersion: v1\nkind: C"
  },
  {
    "path": "deployment/k8s/kustomizations/include/cm-init-scripts-r2r.yaml",
    "chars": 3268,
    "preview": "# This file contains the initialization scripts used by the InitContainers in the Job manifests.\n\napiVersion: v1\nkind: C"
  },
  {
    "path": "deployment/k8s/kustomizations/include/cm-r2r.yaml",
    "chars": 1658,
    "preview": "# r2r-configmap.yaml\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: r2r-configmap\n  annotations:\n    argocd.argoproj.i"
  },
  {
    "path": "deployment/k8s/kustomizations/include/cm-unstructured.yaml",
    "chars": 327,
    "preview": "---\n# unstructured-configmap.yaml\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: unstructured-configmap\n  annotations:"
  },
  {
    "path": "deployment/k8s/kustomizations/include/hatchet-dashboard-initc.yaml",
    "chars": 1735,
    "preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n  name: hatchet-dashboard\nspec:\n  selector:\n    app: hatchet-dashboard\n  port"
  },
  {
    "path": "deployment/k8s/kustomizations/include/hatchet-engine-initc.yaml",
    "chars": 2359,
    "preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n  name: hatchet-engine\nspec:\n  selector:\n    app: hatchet-engine\n  ports:\n   "
  },
  {
    "path": "deployment/k8s/kustomizations/include/hatchet-init-job.yaml",
    "chars": 6455,
    "preview": "apiVersion: batch/v1\nkind: Job\nmetadata:\n  #generate a unique name for the job\n  #generateName: hatchet-init-job-\n  name"
  },
  {
    "path": "deployment/k8s/kustomizations/include/hatchet-rabbitmq-sts.yaml",
    "chars": 1834,
    "preview": "---\napiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n  name: hatchet-rabbitmq\nspec:\n  serviceName: \"hatchet-rabbitmq\"\n  r"
  },
  {
    "path": "deployment/k8s/kustomizations/include/pgadmin.yaml",
    "chars": 817,
    "preview": "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: pgadmin\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      ap"
  },
  {
    "path": "deployment/k8s/kustomizations/include/pgvector-sts.yaml",
    "chars": 2491,
    "preview": "---\napiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n  name: r2r-pgvector\nspec:\n  serviceName: \"r2r-pgvector\"\n  replicas:"
  },
  {
    "path": "deployment/k8s/kustomizations/include/r2r-dashboard-indep.yaml",
    "chars": 1467,
    "preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: r2r-dashboard\nspec:\n  replicas: 1\n  selector:\n    matchLabels"
  },
  {
    "path": "deployment/k8s/kustomizations/include/r2r-graph-clustering-indep.yaml",
    "chars": 799,
    "preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: r2r-graph-clustering\nspec:\n  replicas: 1\n  selector:\n    matc"
  },
  {
    "path": "deployment/k8s/kustomizations/include/r2r-initc.yaml",
    "chars": 4591,
    "preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: r2r\n  annotations:\n    argocd.argoproj.io/sync-wave: \"30\"\n\nsp"
  },
  {
    "path": "deployment/k8s/kustomizations/include/r2r-nginx-indep.yaml",
    "chars": 1035,
    "preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: r2r-nginx\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n  "
  },
  {
    "path": "deployment/k8s/kustomizations/include/unstructured-indep.yaml",
    "chars": 823,
    "preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: unstructured\nspec:\n  replicas: 1\n  selector:\n    matchLabels:"
  },
  {
    "path": "deployment/k8s/kustomizations/kustomization.yaml",
    "chars": 4102,
    "preview": "# kustomize build deployment/k8s/kustomizations --enable-helm > deployment/k8s/kustomizations/r2r.kustimized.yaml\n\napiVe"
  },
  {
    "path": "deployment/k8s/kustomizations/patches/hatchet-rabbitmq-sts.yaml",
    "chars": 1024,
    "preview": "apiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n  name: hatchet-rabbitmq\nspec:\n  volumeClaimTemplates:\n    - kind: Persi"
  },
  {
    "path": "deployment/k8s/kustomizations/patches/rm-secret-hatchet-postgres.yaml",
    "chars": 78,
    "preview": "$patch: delete\napiVersion: v1\nkind: Secret\nmetadata:\n  name: hatchet-postgres\n"
  },
  {
    "path": "deployment/k8s/kustomizations/patches/rm-secret-hatchet-rabbitmq-config.yaml",
    "chars": 85,
    "preview": "$patch: delete\napiVersion: v1\nkind: Secret\nmetadata:\n  name: hatchet-rabbitmq-config\n"
  },
  {
    "path": "deployment/k8s/kustomizations/patches/rm-secret-hatchet-rabbitmq.yaml",
    "chars": 78,
    "preview": "$patch: delete\napiVersion: v1\nkind: Secret\nmetadata:\n  name: hatchet-rabbitmq\n"
  },
  {
    "path": "deployment/k8s/kustomizations/patches/rm-secret-hatchet-shared-config.yaml",
    "chars": 83,
    "preview": "$patch: delete\napiVersion: v1\nkind: Secret\nmetadata:\n  name: hatchet-shared-config\n"
  },
  {
    "path": "deployment/k8s/kustomizations/patches/service.yaml",
    "chars": 149,
    "preview": "- op: replace\n  path: /spec/ipFamilies\n  value:\n    - IPv4\n\n- op: replace\n  path: /spec/ipFamilyPolicy\n  value:\n    Sing"
  },
  {
    "path": "deployment/k8s/manifests/examples/externalsecret_hatchet.yaml",
    "chars": 5109,
    "preview": "---\napiVersion: external-secrets.io/v1beta1\nkind: ExternalSecret\nmetadata:\n  name: hatchet-shared-config\n  annotations:\n"
  },
  {
    "path": "deployment/k8s/manifests/examples/externalsecret_r2r.yaml",
    "chars": 17714,
    "preview": "apiVersion: external-secrets.io/v1beta1\nkind: ExternalSecret\nmetadata:\n  name: r2r-secrets\n  annotations:\n    argocd.arg"
  },
  {
    "path": "deployment/k8s/manifests/examples/ingress-r2r.yaml",
    "chars": 2155,
    "preview": "# Dependancy https://external-dns.io\n# To add a DNS record for wren-ui.myhost.net host\n# Note: without authentication, e"
  },
  {
    "path": "deployment/k8s/manifests/examples/secrets_hatchet.yaml",
    "chars": 1099,
    "preview": "---\napiVersion: v1\ndata:\n  ADMIN_EMAIL: ++++++++\n  ADMIN_PASSWORD: ++++++++\n  DATABASE_POSTGRES_DB_NAME: ++++++++\n  DATA"
  },
  {
    "path": "deployment/k8s/manifests/examples/secrets_r2r.yaml",
    "chars": 959,
    "preview": "---\napiVersion: v1\ndata:\n  ANTHROPIC_API_KEY: ++++++++\n  ANYSCALE_API_KEY: ++++++++\n  AWS_ACCESS_KEY_ID: ++++++++\n  AWS_"
  },
  {
    "path": "docker/compose.full.swarm.yaml",
    "chars": 13866,
    "preview": "volumes:\n  hatchet_certs:\n    name: ${VOLUME_HATCHET_CERTS:-hatchet_certs}\n  hatchet_config:\n    name: ${VOLUME_HATCHET_"
  },
  {
    "path": "docker/compose.full.yaml",
    "chars": 5445,
    "preview": "volumes:\n  hatchet_certs:\n    name: hatchet_certs\n  hatchet_config:\n    name: hatchet_config\n  hatchet_api_key:\n    name"
  },
  {
    "path": "docker/compose.yaml",
    "chars": 1695,
    "preview": "volumes:\n  postgres_data:\n    name: postgres_data\n  minio_data:\n    name: minio_data\n\nservices:\n  postgres:\n    image: p"
  },
  {
    "path": "docker/env/hatchet.env",
    "chars": 920,
    "preview": "DATABASE_URL=\"postgres://hatchet_user:hatchet_password@hatchet-postgres:5432/hatchet?sslmode=disable\"\n\nHATCHET_CLIENT_GR"
  },
  {
    "path": "docker/env/minio.env",
    "chars": 58,
    "preview": "MINIO_ROOT_USER=minioadmin\nMINIO_ROOT_PASSWORD=minioadmin\n"
  },
  {
    "path": "docker/env/postgres.env",
    "chars": 134,
    "preview": "POSTGRES_USER=postgres\nPOSTGRES_PASSWORD=postgres\nPOSTGRES_HOST=postgres\nPOSTGRES_PORT=5432\nPOSTGRES_MAX_CONNECTIONS=102"
  },
  {
    "path": "docker/env/r2r-dashboard.env",
    "chars": 216,
    "preview": "NEXT_PUBLIC_R2R_DEPLOYMENT_URL=http://localhost:7272\nNEXT_PUBLIC_HATCHET_DASHBOARD_URL=http://localhost:7274\nNEXT_PUBLIC"
  },
  {
    "path": "docker/env/r2r-full.env",
    "chars": 1863,
    "preview": "# R2R\nR2R_PORT=7272\nR2R_HOST=0.0.0.0\nR2R_LOG_LEVEL=INFO\nR2R_CONFIG_NAME=full\nR2R_CONFIG_PATH=\nR2R_PROJECT_NAME=r2r_defau"
  },
  {
    "path": "docker/env/r2r.env",
    "chars": 1859,
    "preview": "# R2R\nR2R_PORT=7272\nR2R_HOST=0.0.0.0\nR2R_LOG_LEVEL=INFO\nR2R_CONFIG_NAME=\nR2R_CONFIG_PATH=\nR2R_PROJECT_NAME=r2r_default\nR"
  },
  {
    "path": "docker/fluent-bit/fluent-bit.conf",
    "chars": 562,
    "preview": "[SERVICE]\n    Flush        1\n    Daemon       Off\n    Log_Level    info\n    Parsers_File parsers.conf\n\n[INPUT]\n    Tag  "
  },
  {
    "path": "docker/fluent-bit/parsers.conf",
    "chars": 41,
    "preview": "[PARSER]\n    Name   json\n    Format json\n"
  },
  {
    "path": "docker/scripts/create-hatchet-db.sh",
    "chars": 717,
    "preview": "#!/bin/bash\n\nset -e\necho 'Waiting for PostgreSQL to be ready...'\nwhile ! pg_isready -h hatchet-postgres -p 5432 -U ${HAT"
  },
  {
    "path": "docker/scripts/setup-token.sh",
    "chars": 1845,
    "preview": "#!/bin/bash\n\nset -e\necho 'Starting token creation process...'\n\n# Attempt to create token and capture both stdout and std"
  },
  {
    "path": "docker/scripts/start-r2r.sh",
    "chars": 301,
    "preview": "#!/bin/bash\n\n# Check if HATCHET_CLIENT_TOKEN is set, if not read it from the API key file\nif [ -z \"${HATCHET_CLIENT_TOKE"
  },
  {
    "path": "docker/user_configs/README.md",
    "chars": 1163,
    "preview": "# User Configs Directory\n\n## Overview\nThis directory is mounted inside the R2R Docker container and is intended for cust"
  },
  {
    "path": "docker/user_tools/README.md",
    "chars": 1930,
    "preview": "# User-Defined Tools Directory\n\n## Overview\nThis directory is mounted inside the R2R Docker container and is intended fo"
  },
  {
    "path": "docker/user_tools/user_requirements.txt",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/README.md",
    "chars": 903,
    "preview": "# R2R Documentation\n\nThe most advanced AI retrieval system. Agentic Retrieval-Augmented Generation (RAG) with a RESTful "
  },
  {
    "path": "docs/cookbooks/application.md",
    "chars": 3383,
    "preview": "R2R offers an [open-source React+Next.js application](https://github.com/SciPhi-AI/R2R-Application) designed to give dev"
  },
  {
    "path": "docs/cookbooks/custom-tools.md",
    "chars": 5073,
    "preview": "There are many cases where it is helpful to define custom tools for the RAG Agent. R2R allows for users to define custom"
  },
  {
    "path": "docs/cookbooks/email.md",
    "chars": 1932,
    "preview": "Configuring your deployment to require email verification helps keep your deployment secure, prevents unauthorized accou"
  },
  {
    "path": "docs/cookbooks/evals.md",
    "chars": 9531,
    "preview": "This guide demonstrates how to evaluate your R2R RAG outputs using the Ragas evaluation framework.\n\nIn this tutorial, yo"
  },
  {
    "path": "docs/cookbooks/graphs.md",
    "chars": 7195,
    "preview": "R2R allows you to build and analyze knowledge graphs from your documents through a collection-based architecture. The sy"
  },
  {
    "path": "docs/cookbooks/ingestion.md",
    "chars": 8925,
    "preview": "R2R provides a powerful and flexible ingestion to process and manage various types of documents. It supports a wide rang"
  },
  {
    "path": "docs/cookbooks/local.md",
    "chars": 7557,
    "preview": "There are many amazing LLMs and embedding models that can be run locally. R2R fully supports using these models, giving "
  },
  {
    "path": "docs/cookbooks/logging.md",
    "chars": 3706,
    "preview": "Users deploying R2R into production settings benefit from robust, persistant logging. R2R supports this via [Victorialog"
  },
  {
    "path": "docs/cookbooks/maintenance.md",
    "chars": 6889,
    "preview": "This guide covers essential maintenance tasks for R2R deployments, with a focus on vector index management and system up"
  },
  {
    "path": "docs/cookbooks/mcp.md",
    "chars": 4402,
    "preview": "The R2R Retrieval System is a Model Context Protocol (MCP) server that enhances Claude with retrieval and search capabil"
  },
  {
    "path": "docs/cookbooks/orchestration.md",
    "chars": 2893,
    "preview": "R2R uses [Hatchet](https://docs.hatchet.run/home) for orchestrating complex workflows, particularly for ingestion and kn"
  },
  {
    "path": "docs/cookbooks/structured-output.md",
    "chars": 3327,
    "preview": "Structured outputs allow users to ensure that the retrieval response generated by the LLM follows a user-defined structu"
  },
  {
    "path": "docs/cookbooks/web-dev.md",
    "chars": 10065,
    "preview": "Web developers can easily integrate R2R into their projects using the [R2R JavaScript client](https://www.npmjs.com/pack"
  },
  {
    "path": "docs/cookbooks/{README.md}",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/README.md",
    "chars": 13084,
    "preview": "# Getting Started with R2R\n\nThis guide will walk you through setting up R2R and using its core features to build AI-powe"
  },
  {
    "path": "docs/documentation/advanced/contextual-enrichment.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/advanced/deduplication.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/general/collections.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/general/conversations.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/general/documents.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/general/graphs.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/general/prompts.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/general/users.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "docs/documentation/retrieval/advanced-rag.md",
    "chars": 7893,
    "preview": "R2R supports advanced Retrieval-Augmented Generation (RAG) techniques that can be easily configured at runtime. This fle"
  },
  {
    "path": "docs/documentation/retrieval/agentic-rag.md",
    "chars": 21042,
    "preview": "## Introduction\nR2R's **Agentic RAG** orchestrates multi-step reasoning with Retrieval-Augmented Generation (RAG). By pa"
  },
  {
    "path": "docs/documentation/retrieval/hybrid-search.md",
    "chars": 4643,
    "preview": "## Introduction\n\nR2R's hybrid search blends keyword-based full-text search with semantic vector search, delivering resul"
  },
  {
    "path": "docs/documentation/retrieval/search-and-rag.md",
    "chars": 22319,
    "preview": "R2R provides powerful search and retrieval capabilities through vector search, full-text search, hybrid search, and Retr"
  },
  {
    "path": "docs/introduction/guides/rag.md",
    "chars": 4093,
    "preview": "# More about RAG\n\n**On this page**\n1. [Before you begin](#before-you-begin)\n2. [What is RAG?](#what-is-rag)\n3. [Set up R"
  },
  {
    "path": "docs/introduction/guides/what-is-r2r.md",
    "chars": 3511,
    "preview": "# What is R2R?\n\n**On this page**\n1. [What does R2R do?](#what-does-r2r-do)\n2. [What can R2R do for my applications?](#wh"
  },
  {
    "path": "docs/introduction/system.md",
    "chars": 3326,
    "preview": "# System Architecture\n\nLearn about the R2R system architecture and how its components work together.\n\n## System Overview"
  },
  {
    "path": "js/README.md",
    "chars": 1023,
    "preview": "# R2R JavaScript SDK Documentation\n\nFor the complete look at the R2R JavaScript SDK, [visit our documentation.](https://"
  },
  {
    "path": "js/sdk/.prettierignore",
    "chars": 10,
    "preview": "examples/\n"
  },
  {
    "path": "js/sdk/README.md",
    "chars": 5164,
    "preview": "<p align=\"left\">\n  <a href=\"https://r2r-docs.sciphi.ai\"><img src=\"https://img.shields.io/badge/docs.sciphi.ai-3F16E4\" al"
  },
  {
    "path": "js/sdk/__tests__/ChunksIntegrationSuperUser.test.ts",
    "chars": 5008,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts",
    "chars": 7014,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect, afterAll } from \"@jest/globals\";\ni"
  },
  {
    "path": "js/sdk/__tests__/ConversationsIntegrationSuperUser.test.ts",
    "chars": 9479,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect, afterAll } from \"@jest/globals\";\ni"
  },
  {
    "path": "js/sdk/__tests__/ConversationsIntegrationUser.test.ts",
    "chars": 8324,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/DocumentsAndCollectionsIntegrationUser.test.ts",
    "chars": 14118,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts",
    "chars": 17723,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect, afterAll } from \"@jest/globals\";\ni"
  },
  {
    "path": "js/sdk/__tests__/GraphsIntegrationSuperUser.test.ts",
    "chars": 30183,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect, afterAll } from \"@jest/globals\";\ni"
  },
  {
    "path": "js/sdk/__tests__/PromptsIntegrationSuperUser.test.ts",
    "chars": 1435,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/RetrievalIntegrationSuperUser.test.ts",
    "chars": 5236,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/SystemIntegrationSuperUser.test.ts",
    "chars": 897,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/SystemIntegrationUser.test.ts",
    "chars": 1877,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect } from \"@jest/globals\";\n\nconst base"
  },
  {
    "path": "js/sdk/__tests__/UsersIntegrationSuperUser.test.ts",
    "chars": 14091,
    "preview": "import { r2rClient } from \"../src/index\";\nimport { describe, test, beforeAll, expect, afterAll } from \"@jest/globals\";\ni"
  },
  {
    "path": "js/sdk/__tests__/util/typeTransformer.test.ts",
    "chars": 6174,
    "preview": "import {\n  ensureCamelCase,\n  ensureSnakeCase,\n} from \"../../src/utils/typeTransformer\";\nimport { describe, it, expect }"
  },
  {
    "path": "js/sdk/examples/data/folder/karamozov.txt",
    "chars": 273,
    "preview": "Alexius Fyodorovich Karamazov erat tertius filius Fyodoris Pavlovich Karamazov\npossessoris terrarum in nostro districtu "
  },
  {
    "path": "js/sdk/examples/data/folder/myshkin.txt",
    "chars": 238,
    "preview": "Sub finem Novembris, tempore liquationis, hora nona mane, tramen in via\nferrea Varsaviae et Petropoli plenis velocitatib"
  },
  {
    "path": "js/sdk/examples/data/invalid.json",
    "chars": 277,
    "preview": "{\n  \"name\": \"John Doe\"\n  \"age\": 30,\n  'address': '123 Main St',\n  \"phone_numbers\": [\n    \"555-0123\",\n    \"555-4567\",\n  ]"
  },
  {
    "path": "js/sdk/examples/data/marmeladov.txt",
    "chars": 1324,
    "preview": "His conversation seemed to excite a general though languid interest. The\nboys at the counter fell to sniggering. The inn"
  },
  {
    "path": "js/sdk/examples/data/raskolnikov.txt",
    "chars": 699,
    "preview": "In vespera praecipue calida ineunte Iulio iuvenis e cenaculo in quo hospitabatur in\nS. loco exiit et lente, quasi dubita"
  },
  {
    "path": "js/sdk/examples/data/raskolnikov_2.txt",
    "chars": 444,
    "preview": "When Raskolnikov got home, his hair was soaked with sweat and he was\nbreathing heavily. He went rapidly up the stairs, w"
  },
  {
    "path": "js/sdk/examples/data/sonia.txt",
    "chars": 2700,
    "preview": "On the canal bank near the bridge and not two houses away from the one\nwhere Sonia lodged, there was a crowd of people, "
  },
  {
    "path": "js/sdk/examples/data/zametov.txt",
    "chars": 1268,
    "preview": "“How he keeps on! Are you afraid of having let out some secret? Don’t\nworry yourself; you said nothing about a countess."
  },
  {
    "path": "js/sdk/package.json",
    "chars": 1365,
    "preview": "{\n  \"name\": \"r2r-js\",\n  \"version\": \"0.4.43\",\n  \"description\": \"\",\n  \"main\": \"dist/index.js\",\n  \"browser\": \"dist/index.br"
  },
  {
    "path": "js/sdk/src/baseClient.ts",
    "chars": 7347,
    "preview": "import axios, {\n  AxiosInstance,\n  AxiosRequestConfig,\n  AxiosResponse,\n  Method,\n} from \"axios\";\nimport FormData from \""
  },
  {
    "path": "js/sdk/src/index.ts",
    "chars": 66,
    "preview": "export { r2rClient } from \"./r2rClient\";\nexport * from \"./types\";\n"
  },
  {
    "path": "js/sdk/src/r2rClient.ts",
    "chars": 7190,
    "preview": "import axios, { AxiosError, Method } from \"axios\";\nimport { BaseClient } from \"./baseClient\";\n\nimport { ChunksClient } f"
  },
  {
    "path": "js/sdk/src/types.ts",
    "chars": 12525,
    "preview": "export interface UnprocessedChunk {\n  id: string;\n  documentId?: string;\n  collectionIds: string[];\n  metadata: Record<s"
  },
  {
    "path": "js/sdk/src/utils/index.ts",
    "chars": 60,
    "preview": "export * from \"./typeTransformer\";\nexport * from \"./utils\";\n"
  },
  {
    "path": "js/sdk/src/utils/typeTransformer.ts",
    "chars": 6371,
    "preview": "/**\n * Utility type to convert string to camelCase\n */\ntype CamelCase<S extends string> = S extends `${infer P}_${infer "
  },
  {
    "path": "js/sdk/src/utils/utils.ts",
    "chars": 329,
    "preview": "export function downloadBlob(blob: Blob, filename: string): void {\n  const url = window.URL.createObjectURL(blob);\n  con"
  },
  {
    "path": "js/sdk/src/v3/clients/chunks.ts",
    "chars": 2860,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  UnprocessedChunk,\n  WrappedBooleanResponse,\n  WrappedChunkRespon"
  },
  {
    "path": "js/sdk/src/v3/clients/collections.ts",
    "chars": 9985,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  WrappedBooleanResponse,\n  WrappedGenericMessageResponse,\n  Wrapp"
  },
  {
    "path": "js/sdk/src/v3/clients/conversations.ts",
    "chars": 8302,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  WrappedBooleanResponse,\n  WrappedConversationMessagesResponse,\n "
  },
  {
    "path": "js/sdk/src/v3/clients/documents.ts",
    "chars": 38739,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport FormData from \"form-data\";\nimport {\n  WrappedBooleanResponse,\n  Wrap"
  },
  {
    "path": "js/sdk/src/v3/clients/graphs.ts",
    "chars": 23825,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  WrappedGraphResponse,\n  WrappedBooleanResponse,\n  WrappedGraphsR"
  },
  {
    "path": "js/sdk/src/v3/clients/indices.ts",
    "chars": 2501,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  IndexConfig,\n  WrappedGenericMessageResponse,\n  WrappedVectorInd"
  },
  {
    "path": "js/sdk/src/v3/clients/prompts.ts",
    "chars": 2917,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  WrappedBooleanResponse,\n  WrappedGenericMessageResponse,\n  Wrapp"
  },
  {
    "path": "js/sdk/src/v3/clients/retrieval.ts",
    "chars": 11259,
    "preview": "import { r2rClient } from \"../../r2rClient\";\n\nimport {\n  GenerationConfig,\n  Message,\n  SearchSettings,\n  WrappedEmbeddi"
  },
  {
    "path": "js/sdk/src/v3/clients/system.ts",
    "chars": 886,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  WrappedGenericMessageResponse,\n  WrappedServerStatsResponse,\n  W"
  },
  {
    "path": "js/sdk/src/v3/clients/users.ts",
    "chars": 14350,
    "preview": "import { r2rClient } from \"../../r2rClient\";\nimport {\n  WrappedAPIKeyResponse,\n  WrappedAPIKeysResponse,\n  WrappedBoolea"
  },
  {
    "path": "js/sdk/tsconfig.json",
    "chars": 588,
    "preview": "{\n  \"compilerOptions\": {\n    \"target\": \"es2017\",\n    \"module\": \"commonjs\",\n    \"outDir\": \"./dist\",\n    \"rootDir\": \"./src"
  },
  {
    "path": "llms.txt",
    "chars": 338881,
    "preview": "# Understanding Internals of R2R Library\n\n## Table of Contents\n\n1. [Introduction](#introduction)\n2. [Installation](#inst"
  },
  {
    "path": "py/.dockerignore",
    "chars": 202,
    "preview": "__pycache__\n*.pyc\n*.pyo\n*.pyd\n.Python\nenv\npip-log.txt\npip-delete-this-directory.txt\n.tox\n.coverage\n.coverage.*\n.cache\nno"
  },
  {
    "path": "py/Dockerfile",
    "chars": 1461,
    "preview": "FROM python:3.12-slim AS builder\n\n# Install system dependencies\nRUN apt-get update && apt-get install -y --no-install-re"
  },
  {
    "path": "py/README.md",
    "chars": 4672,
    "preview": "<img width=\"1217\" alt=\"Screenshot 2025-03-27 at 6 35 02 AM\" src=\"https://github.com/user-attachments/assets/10b530a6-527"
  },
  {
    "path": "py/all_possible_config.toml",
    "chars": 9050,
    "preview": "################################################################################\n# Global Application Settings (AppConfi"
  },
  {
    "path": "py/core/__init__.py",
    "chars": 3932,
    "preview": "import logging\n\n# Keep '*' imports for enhanced development velocity\nfrom .agent import *\nfrom .base import *\nfrom .main"
  },
  {
    "path": "py/core/agent/__init__.py",
    "chars": 864,
    "preview": "# FIXME: Once the agent is properly type annotated, remove the type: ignore comments\nfrom .base import (  # type: ignore"
  },
  {
    "path": "py/core/agent/base.py",
    "chars": 64521,
    "preview": "import asyncio\nimport json\nimport logging\nimport re\nfrom abc import ABCMeta\nfrom typing import AsyncGenerator, Optional,"
  },
  {
    "path": "py/core/agent/rag.py",
    "chars": 11127,
    "preview": "# type: ignore\nimport logging\nfrom typing import Callable, Optional\n\nfrom core.base import (\n    format_search_results_f"
  },
  {
    "path": "py/core/agent/research.py",
    "chars": 26695,
    "preview": "import logging\nimport os\nimport subprocess\nimport sys\nimport tempfile\nfrom copy import copy\nfrom typing import Any, Call"
  },
  {
    "path": "py/core/base/__init__.py",
    "chars": 2765,
    "preview": "from .abstractions import *\nfrom .agent import *\nfrom .api.models import *\nfrom .parsers import *\nfrom .providers import"
  },
  {
    "path": "py/core/base/abstractions/__init__.py",
    "chars": 3357,
    "preview": "from shared.abstractions.base import AsyncSyncMeta, R2RSerializable, syncable\nfrom shared.abstractions.document import ("
  },
  {
    "path": "py/core/base/agent/__init__.py",
    "chars": 262,
    "preview": "# FIXME: Once the agent is properly type annotated, remove the type: ignore comments\nfrom .agent import (  # type: ignor"
  },
  {
    "path": "py/core/base/agent/agent.py",
    "chars": 10121,
    "preview": "# type: ignore\nimport asyncio\nimport json\nimport logging\nfrom abc import ABC, abstractmethod\nfrom datetime import dateti"
  },
  {
    "path": "py/core/base/agent/tools/built_in/get_file_content.py",
    "chars": 2824,
    "preview": "import logging\nfrom typing import Any, Optional\nfrom uuid import UUID\n\nfrom shared.abstractions.tool import Tool\n\nlogger"
  },
  {
    "path": "py/core/base/agent/tools/built_in/search_file_descriptions.py",
    "chars": 2383,
    "preview": "import logging\n\nfrom shared.abstractions.tool import Tool\n\nlogger = logging.getLogger(__name__)\n\n\nclass SearchFileDescri"
  },
  {
    "path": "py/core/base/agent/tools/built_in/search_file_knowledge.py",
    "chars": 2884,
    "preview": "import logging\n\nfrom shared.abstractions.tool import Tool\n\nlogger = logging.getLogger(__name__)\n\n\nclass SearchFileKnowle"
  },
  {
    "path": "py/core/base/agent/tools/built_in/tavily_extract.py",
    "chars": 3855,
    "preview": "import logging\n\nfrom core.utils import (\n    generate_id,\n)\nfrom shared.abstractions.tool import Tool\n\nlogger = logging."
  },
  {
    "path": "py/core/base/agent/tools/built_in/tavily_search.py",
    "chars": 4760,
    "preview": "import logging\n\nfrom core.utils import (\n    generate_id,\n)\nfrom shared.abstractions.tool import Tool\n\nlogger = logging."
  },
  {
    "path": "py/core/base/agent/tools/built_in/web_scrape.py",
    "chars": 2873,
    "preview": "import logging\n\nfrom core.utils import (\n    generate_id,\n)\nfrom shared.abstractions.tool import Tool\n\nlogger = logging."
  },
  {
    "path": "py/core/base/agent/tools/built_in/web_search.py",
    "chars": 1940,
    "preview": "from shared.abstractions.tool import Tool\n\n\nclass WebSearchTool(Tool):\n    \"\"\"\n    A web search tool that uses Serper to"
  },
  {
    "path": "py/core/base/agent/tools/registry.py",
    "chars": 6717,
    "preview": "import importlib\nimport inspect\nimport logging\nimport os\nimport pkgutil\nimport sys\nfrom typing import Callable, Optional"
  },
  {
    "path": "py/core/base/api/models/__init__.py",
    "chars": 5464,
    "preview": "from shared.api.models.auth.responses import (\n    TokenResponse,\n    WrappedTokenResponse,\n)\nfrom shared.api.models.bas"
  },
  {
    "path": "py/core/base/parsers/__init__.py",
    "chars": 71,
    "preview": "from .base_parser import AsyncParser\n\n__all__ = [\n    \"AsyncParser\",\n]\n"
  },
  {
    "path": "py/core/base/parsers/base_parser.py",
    "chars": 293,
    "preview": "\"\"\"Abstract base class for parsers.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import AsyncGenerator, Generic, "
  },
  {
    "path": "py/core/base/providers/__init__.py",
    "chars": 1768,
    "preview": "from .auth import AuthConfig, AuthProvider\nfrom .base import AppConfig, Provider, ProviderConfig\nfrom .crypto import Cry"
  },
  {
    "path": "py/core/base/providers/auth.py",
    "chars": 7969,
    "preview": "import logging\nfrom abc import ABC, abstractmethod\nfrom datetime import datetime\nfrom typing import TYPE_CHECKING, Optio"
  },
  {
    "path": "py/core/base/providers/base.py",
    "chars": 3931,
    "preview": "from abc import ABC, abstractmethod\nfrom typing import Any, Optional, Type\n\nfrom pydantic import BaseModel\n\n\nclass Inner"
  },
  {
    "path": "py/core/base/providers/crypto.py",
    "chars": 3669,
    "preview": "from abc import ABC, abstractmethod\nfrom datetime import datetime\nfrom typing import Optional, Tuple\n\nfrom .base import "
  },
  {
    "path": "py/core/base/providers/database.py",
    "chars": 6342,
    "preview": "\"\"\"Base classes for database providers.\"\"\"\n\nimport logging\nfrom abc import ABC, abstractmethod\nfrom typing import Any, O"
  },
  {
    "path": "py/core/base/providers/email.py",
    "chars": 2622,
    "preview": "import logging\nimport os\nfrom abc import ABC, abstractmethod\nfrom typing import Optional\n\nfrom .base import Provider, Pr"
  },
  {
    "path": "py/core/base/providers/embedding.py",
    "chars": 4802,
    "preview": "import asyncio\nimport logging\nimport random\nimport time\nfrom abc import abstractmethod\nfrom enum import Enum\nfrom typing"
  },
  {
    "path": "py/core/base/providers/file.py",
    "chars": 2890,
    "preview": "import logging\nimport os\nfrom abc import ABC, abstractmethod\nfrom datetime import datetime\nfrom io import BytesIO\nfrom t"
  },
  {
    "path": "py/core/base/providers/ingestion.py",
    "chars": 6204,
    "preview": "import logging\nfrom abc import ABC\nfrom enum import Enum\nfrom typing import TYPE_CHECKING, Any, ClassVar, Optional\n\nfrom"
  },
  {
    "path": "py/core/base/providers/llm.py",
    "chars": 8550,
    "preview": "import asyncio\nimport logging\nimport random\nimport time\nfrom abc import abstractmethod\nfrom concurrent.futures import Th"
  },
  {
    "path": "py/core/base/providers/ocr.py",
    "chars": 3710,
    "preview": "import asyncio\nimport logging\nimport random\nimport time\nfrom abc import abstractmethod\nfrom concurrent.futures import Th"
  },
  {
    "path": "py/core/base/providers/orchestration.py",
    "chars": 1647,
    "preview": "from abc import abstractmethod\nfrom enum import Enum\nfrom typing import Any\n\nfrom .base import Provider, ProviderConfig\n"
  },
  {
    "path": "py/core/base/providers/scheduler.py",
    "chars": 905,
    "preview": "from abc import abstractmethod\n\nfrom .base import Provider, ProviderConfig\n\n\nclass SchedulerConfig(ProviderConfig):\n    "
  },
  {
    "path": "py/core/base/utils/__init__.py",
    "chars": 946,
    "preview": "from shared.utils import (\n    RecursiveCharacterTextSplitter,\n    TextSplitter,\n    _decorate_vector_type,\n    _get_vec"
  },
  {
    "path": "py/core/configs/full.toml",
    "chars": 439,
    "preview": "[completion]\nprovider = \"r2r\"\nconcurrent_request_limit = 128\n\n[ingestion]\nprovider = \"unstructured_local\"\nstrategy = \"au"
  },
  {
    "path": "py/core/configs/full_azure.toml",
    "chars": 1192,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"azure/gpt-4.1-mini\"\n\n# LLM used f"
  },
  {
    "path": "py/core/configs/full_lm_studio.toml",
    "chars": 1487,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"lm_studio/llama-3.2-3b-instruct\"\n"
  },
  {
    "path": "py/core/configs/full_ollama.toml",
    "chars": 1424,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"ollama/llama3.1\"\n\n# LLM used for "
  },
  {
    "path": "py/core/configs/gemini.toml",
    "chars": 457,
    "preview": "[app]\nfast_llm = \"gemini/gemini-2.0-flash-lite\"\nquality_llm = \"gemini/gemini-2.0-flash\"\nvlm = \"gemini/gemini-2.0-flash\"\n"
  },
  {
    "path": "py/core/configs/lm_studio.toml",
    "chars": 1146,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"lm_studio/llama-3.2-3b-instruct\"\n"
  },
  {
    "path": "py/core/configs/ollama.toml",
    "chars": 1242,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"ollama/llama3.1\" ### NOTE - RECOM"
  },
  {
    "path": "py/core/configs/r2r_azure.toml",
    "chars": 589,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"azure/gpt-4.1-mini\"\n\n# LLM used f"
  },
  {
    "path": "py/core/configs/r2r_azure_with_test_limits.toml",
    "chars": 957,
    "preview": "[app]\n# LLM used for internal operations, like deriving conversation names\nfast_llm = \"azure/gpt-4.1-mini\"\n\n# LLM used f"
  },
  {
    "path": "py/core/configs/r2r_with_auth.toml",
    "chars": 253,
    "preview": "[auth]\nprovider = \"r2r\"\naccess_token_lifetime_in_minutes = 60\nrefresh_token_lifetime_in_days = 7\nrequire_authentication "
  },
  {
    "path": "py/core/configs/tavily.toml",
    "chars": 629,
    "preview": "[completion]\nprovider = \"r2r\"\nconcurrent_request_limit = 128\n\n[ingestion]\nprovider = \"unstructured_local\"\nstrategy = \"au"
  },
  {
    "path": "py/core/examples/__init__.py",
    "chars": 0,
    "preview": ""
  }
]

// ... and 301 more files (download for full content)

About this extraction

This page contains the full source code of the SciPhi-AI/R2R GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 501 files (4.5 MB), approximately 1.2M tokens, and a symbol index with 2934 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo