Repository: HKUDS/LightRAG Branch: main Commit: 3131c0e90514 Files: 378 Total size: 6.2 MB Directory structure: gitextract_rq0j1lje/ ├── .clinerules/ │ └── 01-basic.md ├── .dockerignore ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ ├── feature_request.yml │ │ └── question.yml │ ├── dependabot.yml │ ├── pull_request_template.md │ └── workflows/ │ ├── claude.yml │ ├── copilot-setup-steps.yml │ ├── docker-build-lite.yml │ ├── docker-build-manual.yml │ ├── docker-publish.yml │ ├── linting.yaml │ ├── pypi-publish.yml │ ├── stale.yaml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── AGENTS.md ├── Dockerfile ├── Dockerfile.lite ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README-zh.md ├── README.md ├── SECURITY.md ├── config.ini.example ├── docker-build-push.sh ├── docker-compose-full.yml ├── docker-compose.yml ├── docs/ │ ├── Algorithm.md │ ├── DockerDeployment.md │ ├── FrontendBuildGuide.md │ ├── InteractiveSetup.md │ ├── LightRAG_concurrent_explain.md │ ├── MilvusConfigurationGuide.md │ ├── OfflineDeployment.md │ └── UV_LOCK_GUIDE.md ├── env.example ├── examples/ │ ├── generate_query.py │ ├── graph_visual_with_html.py │ ├── graph_visual_with_neo4j.py │ ├── graph_visual_with_opensearch.py │ ├── insert_custom_kg.py │ ├── lightrag_azure_openai_demo.py │ ├── lightrag_gemini_demo.py │ ├── lightrag_gemini_postgres_demo.py │ ├── lightrag_gemini_workspace_demo.py │ ├── lightrag_ollama_demo.py │ ├── lightrag_openai_compatible_demo.py │ ├── lightrag_openai_demo.py │ ├── lightrag_openai_mongodb_graph_demo.py │ ├── lightrag_openai_opensearch_graph_demo.py │ ├── lightrag_vllm_demo.py │ ├── milvus_kwargs_configuration_demo.py │ ├── modalprocessors_example.py │ ├── opensearch_storage_demo.py │ ├── raganything_example.py │ ├── rerank_example.py │ └── unofficial-sample/ │ ├── copy_llm_cache_to_another_storage.py │ ├── lightrag_bedrock_demo.py │ ├── lightrag_cloudflare_demo.py │ ├── lightrag_hf_demo.py │ ├── lightrag_llamaindex_direct_demo.py │ ├── lightrag_llamaindex_litellm_demo.py │ ├── lightrag_llamaindex_litellm_opik_demo.py │ ├── lightrag_lmdeploy_demo.py │ ├── lightrag_nvidia_demo.py │ └── lightrag_openai_neo4j_milvus_redis_demo.py ├── k8s-deploy/ │ ├── README-zh.md │ ├── README.md │ ├── databases/ │ │ ├── 00-config.sh │ │ ├── 01-prepare.sh │ │ ├── 02-install-database.sh │ │ ├── 03-uninstall-database.sh │ │ ├── 04-cleanup.sh │ │ ├── README.md │ │ ├── elasticsearch/ │ │ │ └── values.yaml │ │ ├── install-kubeblocks.sh │ │ ├── mongodb/ │ │ │ └── values.yaml │ │ ├── neo4j/ │ │ │ └── values.yaml │ │ ├── postgresql/ │ │ │ └── values.yaml │ │ ├── qdrant/ │ │ │ └── values.yaml │ │ ├── redis/ │ │ │ └── values.yaml │ │ ├── scripts/ │ │ │ └── common.sh │ │ └── uninstall-kubeblocks.sh │ ├── install_lightrag.sh │ ├── install_lightrag_dev.sh │ ├── lightrag/ │ │ ├── .helmignore │ │ ├── Chart.yaml │ │ ├── templates/ │ │ │ ├── NOTES.txt │ │ │ ├── _helpers.tpl │ │ │ ├── deployment.yaml │ │ │ ├── pvc.yaml │ │ │ ├── secret.yaml │ │ │ └── service.yaml │ │ └── values.yaml │ ├── uninstall_lightrag.sh │ └── uninstall_lightrag_dev.sh ├── lightrag/ │ ├── __init__.py │ ├── api/ │ │ ├── .gitignore │ │ ├── README-zh.md │ │ ├── README.md │ │ ├── __init__.py │ │ ├── auth.py │ │ ├── config.py │ │ ├── gunicorn_config.py │ │ ├── lightrag_server.py │ │ ├── routers/ │ │ │ ├── __init__.py │ │ │ ├── document_routes.py │ │ │ ├── graph_routes.py │ │ │ ├── ollama_api.py │ │ │ └── query_routes.py │ │ ├── run_with_gunicorn.py │ │ ├── runtime_validation.py │ │ ├── static/ │ │ │ └── swagger-ui/ │ │ │ ├── swagger-ui-bundle.js │ │ │ └── swagger-ui.css │ │ └── utils_api.py │ ├── base.py │ ├── constants.py │ ├── evaluation/ │ │ ├── README_EVALUASTION_RAGAS.md │ │ ├── __init__.py │ │ ├── eval_rag_quality.py │ │ ├── sample_dataset.json │ │ └── sample_documents/ │ │ ├── 01_lightrag_overview.md │ │ ├── 02_rag_architecture.md │ │ ├── 03_lightrag_improvements.md │ │ ├── 04_supported_databases.md │ │ ├── 05_evaluation_and_deployment.md │ │ └── README.md │ ├── exceptions.py │ ├── kg/ │ │ ├── __init__.py │ │ ├── deprecated/ │ │ │ └── chroma_impl.py │ │ ├── faiss_impl.py │ │ ├── json_doc_status_impl.py │ │ ├── json_kv_impl.py │ │ ├── memgraph_impl.py │ │ ├── milvus_impl.py │ │ ├── mongo_impl.py │ │ ├── nano_vector_db_impl.py │ │ ├── neo4j_impl.py │ │ ├── networkx_impl.py │ │ ├── opensearch_impl.py │ │ ├── postgres_impl.py │ │ ├── qdrant_impl.py │ │ ├── redis_impl.py │ │ └── shared_storage.py │ ├── lightrag.py │ ├── llm/ │ │ ├── __init__.py │ │ ├── anthropic.py │ │ ├── azure_openai.py │ │ ├── bedrock.py │ │ ├── binding_options.py │ │ ├── deprecated/ │ │ │ └── siliconcloud.py │ │ ├── gemini.py │ │ ├── hf.py │ │ ├── jina.py │ │ ├── llama_index_impl.py │ │ ├── lmdeploy.py │ │ ├── lollms.py │ │ ├── nvidia_openai.py │ │ ├── ollama.py │ │ ├── openai.py │ │ └── zhipu.py │ ├── namespace.py │ ├── operate.py │ ├── prompt.py │ ├── rerank.py │ ├── tools/ │ │ ├── README_CLEAN_LLM_QUERY_CACHE.md │ │ ├── README_MIGRATE_LLM_CACHE.md │ │ ├── __init__.py │ │ ├── check_initialization.py │ │ ├── clean_llm_query_cache.py │ │ ├── download_cache.py │ │ ├── lightrag_visualizer/ │ │ │ ├── README-zh.md │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── assets/ │ │ │ │ ├── LICENSE - Geist.txt │ │ │ │ ├── LICENSE - SmileySans.txt │ │ │ │ └── place_font_here │ │ │ ├── graph_visualizer.py │ │ │ └── requirements.txt │ │ ├── migrate_llm_cache.py │ │ └── prepare_qdrant_legacy_data.py │ ├── types.py │ ├── utils.py │ └── utils_graph.py ├── lightrag.service.example ├── lightrag_webui/ │ ├── .gitignore │ ├── .prettierrc.json │ ├── README.md │ ├── components.json │ ├── env.development.smaple │ ├── env.local.sample │ ├── eslint.config.js │ ├── index.html │ ├── package.json │ ├── src/ │ │ ├── App.tsx │ │ ├── AppRouter.tsx │ │ ├── api/ │ │ │ └── lightrag.ts │ │ ├── components/ │ │ │ ├── ApiKeyAlert.tsx │ │ │ ├── AppSettings.tsx │ │ │ ├── LanguageToggle.tsx │ │ │ ├── Root.tsx │ │ │ ├── ThemeProvider.tsx │ │ │ ├── ThemeToggle.tsx │ │ │ ├── documents/ │ │ │ │ ├── ClearDocumentsDialog.tsx │ │ │ │ ├── DeleteDocumentsDialog.tsx │ │ │ │ ├── PipelineStatusDialog.tsx │ │ │ │ └── UploadDocumentsDialog.tsx │ │ │ ├── graph/ │ │ │ │ ├── EditablePropertyRow.tsx │ │ │ │ ├── FocusOnNode.tsx │ │ │ │ ├── FullScreenControl.tsx │ │ │ │ ├── GraphControl.tsx │ │ │ │ ├── GraphLabels.tsx │ │ │ │ ├── GraphSearch.tsx │ │ │ │ ├── LayoutsControl.tsx │ │ │ │ ├── Legend.tsx │ │ │ │ ├── LegendButton.tsx │ │ │ │ ├── MergeDialog.tsx │ │ │ │ ├── PropertiesView.tsx │ │ │ │ ├── PropertyEditDialog.tsx │ │ │ │ ├── PropertyRowComponents.tsx │ │ │ │ ├── Settings.tsx │ │ │ │ ├── SettingsDisplay.tsx │ │ │ │ └── ZoomControl.tsx │ │ │ ├── retrieval/ │ │ │ │ ├── ChatMessage.tsx │ │ │ │ └── QuerySettings.tsx │ │ │ ├── status/ │ │ │ │ ├── StatusCard.tsx │ │ │ │ ├── StatusDialog.tsx │ │ │ │ └── StatusIndicator.tsx │ │ │ └── ui/ │ │ │ ├── Alert.tsx │ │ │ ├── AlertDialog.tsx │ │ │ ├── AsyncSearch.tsx │ │ │ ├── AsyncSelect.tsx │ │ │ ├── Badge.tsx │ │ │ ├── Button.tsx │ │ │ ├── Card.tsx │ │ │ ├── Checkbox.tsx │ │ │ ├── Command.tsx │ │ │ ├── DataTable.tsx │ │ │ ├── Dialog.tsx │ │ │ ├── EmptyCard.tsx │ │ │ ├── FileUploader.tsx │ │ │ ├── Input.tsx │ │ │ ├── NumberInput.tsx │ │ │ ├── PaginationControls.tsx │ │ │ ├── Popover.tsx │ │ │ ├── Progress.tsx │ │ │ ├── ScrollArea.tsx │ │ │ ├── Select.tsx │ │ │ ├── Separator.tsx │ │ │ ├── TabContent.tsx │ │ │ ├── Table.tsx │ │ │ ├── Tabs.tsx │ │ │ ├── Text.tsx │ │ │ ├── Textarea.tsx │ │ │ ├── Tooltip.tsx │ │ │ └── UserPromptInputWithHistory.tsx │ │ ├── contexts/ │ │ │ ├── TabVisibilityProvider.tsx │ │ │ ├── context.ts │ │ │ ├── types.ts │ │ │ └── useTabVisibility.ts │ │ ├── features/ │ │ │ ├── ApiSite.tsx │ │ │ ├── DocumentManager.tsx │ │ │ ├── GraphViewer.tsx │ │ │ ├── LoginPage.tsx │ │ │ ├── RetrievalTesting.tsx │ │ │ └── SiteHeader.tsx │ │ ├── hooks/ │ │ │ ├── useDebounce.tsx │ │ │ ├── useLightragGraph.tsx │ │ │ ├── useRandomGraph.tsx │ │ │ └── useTheme.tsx │ │ ├── i18n.ts │ │ ├── index.css │ │ ├── lib/ │ │ │ ├── constants.ts │ │ │ ├── extensions.ts │ │ │ └── utils.ts │ │ ├── locales/ │ │ │ ├── ar.json │ │ │ ├── de.json │ │ │ ├── en.json │ │ │ ├── fr.json │ │ │ ├── ja.json │ │ │ ├── ko.json │ │ │ ├── ru.json │ │ │ ├── uk.json │ │ │ ├── vi.json │ │ │ ├── zh.json │ │ │ └── zh_TW.json │ │ ├── main.tsx │ │ ├── services/ │ │ │ └── navigation.ts │ │ ├── stores/ │ │ │ ├── graph.ts │ │ │ ├── settings.ts │ │ │ └── state.ts │ │ ├── types/ │ │ │ └── katex.d.ts │ │ ├── utils/ │ │ │ ├── SearchHistoryManager.ts │ │ │ ├── clipboard.ts │ │ │ ├── graphColor.ts │ │ │ └── remarkFootnotes.ts │ │ └── vite-env.d.ts │ ├── tailwind.config.js │ ├── tsconfig.json │ └── vite.config.ts ├── pyproject.toml ├── reproduce/ │ ├── Step_0.py │ ├── Step_1.py │ ├── Step_1_openai_compatible.py │ ├── Step_2.py │ ├── Step_3.py │ ├── Step_3_openai_compatible.py │ └── batch_eval.py ├── requirements-offline-llm.txt ├── requirements-offline-storage.txt ├── requirements-offline.txt ├── scripts/ │ ├── setup/ │ │ ├── lib/ │ │ │ ├── file_ops.sh │ │ │ ├── presets.sh │ │ │ ├── prompts.sh │ │ │ ├── storage_requirements.sh │ │ │ └── validation.sh │ │ ├── setup.sh │ │ └── templates/ │ │ ├── memgraph.yml │ │ ├── milvus-gpu.yml │ │ ├── milvus.yml │ │ ├── mongodb.yml │ │ ├── neo4j.yml │ │ ├── opensearch.yml │ │ ├── postgres.yml │ │ ├── qdrant-gpu.yml │ │ ├── qdrant.yml │ │ ├── redis.conf.template │ │ ├── redis.yml │ │ ├── vllm-embed-gpu.yml │ │ ├── vllm-embed.yml │ │ ├── vllm-rerank-gpu.yml │ │ └── vllm-rerank.yml │ └── test.sh ├── setup.py └── tests/ ├── README_WORKSPACE_ISOLATION_TESTS.md ├── conftest.py ├── test_aquery_data_endpoint.py ├── test_batch_embeddings.py ├── test_chunking.py ├── test_curl_aquery_data.sh ├── test_description_api_validation.py ├── test_dimension_mismatch.py ├── test_doc_status_chunk_preservation.py ├── test_document_file_path_normalization.py ├── test_extract_entities.py ├── test_faiss_meta_inconsistency.py ├── test_graph_storage.py ├── test_interactive_setup_outputs.py ├── test_lightrag_ollama_chat.py ├── test_llm_cache_tools_opensearch.py ├── test_milvus_index_config.py ├── test_milvus_index_creation.py ├── test_milvus_kwargs_bridge.py ├── test_neo4j_fulltext_index.py ├── test_no_model_suffix_safety.py ├── test_opensearch_storage.py ├── test_overlap_validation.py ├── test_postgres_index_name.py ├── test_postgres_migration.py ├── test_postgres_retry_integration.py ├── test_postgres_upsert.py ├── test_qdrant_migration.py ├── test_qdrant_upsert_batching.py ├── test_rerank_chunking.py ├── test_runtime_target_validation.py ├── test_token_auto_renewal.py ├── test_unified_lock_safety.py ├── test_workspace_isolation.py ├── test_workspace_migration_isolation.py ├── test_workspace_sanitization.py ├── test_write_json_optimization.py └── test_zhipu_llm.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clinerules/01-basic.md ================================================ # LightRAG Project Intelligence (.clinerules) ## Project Overview LightRAG is a mature, production-ready Retrieval-Augmented Generation (RAG) system with comprehensive knowledge graph capabilities. The system has evolved from experimental to production-ready status with extensive functionality across all major components. ## Current System State (August 15, 2025) - **Status**: Production Ready - Stable and Mature - **Configuration**: Gemini 2.5 Flash + BAAI/bge-m3 embeddings via custom endpoints - **Storage**: Default in-memory with file persistence (JsonKVStorage, NetworkXStorage, NanoVectorDBStorage) - **Language**: Chinese for summaries - **Workspace**: `space1` for data isolation - **Authentication**: JWT-based with admin/user accounts ## Critical Implementation Patterns ### 1. Embedding Format Compatibility (CRITICAL) **Pattern**: Always handle both base64 and raw array embedding formats **Location**: `lightrag/llm/openai.py` - `openai_embed` function **Issue**: Custom OpenAI-compatible endpoints return embeddings as raw arrays, not base64 strings **Solution**: ```python np.array(dp.embedding, dtype=np.float32) if isinstance(dp.embedding, list) else np.frombuffer(base64.b64decode(dp.embedding), dtype=np.float32) ``` **Impact**: Document processing fails completely without this dual format support ### 2. Async Pattern Consistency (CRITICAL) **Pattern**: Always await coroutines before calling methods on the result **Common Error**: `coroutine.method()` instead of `(await coroutine).method()` **Locations**: MongoDB implementations, Neo4j operations **Example**: `await self._data.list_indexes()` then `await cursor.to_list()` ### 3. Storage Layer Data Compatibility (CRITICAL) **Pattern**: Always filter deprecated/incompatible fields during deserialization **Common Fields to Remove**: `content`, `_id` (MongoDB), database-specific fields **Implementation**: `data.pop('field_name', None)` before creating dataclass objects **Locations**: All storage implementations (JSON, Redis, MongoDB, PostgreSQL) ### 4. Lock Key Generation (CRITICAL) **Pattern**: Always sort relationship pairs for consistent lock keys **Implementation**: `sorted_key_parts = sorted([src, tgt])` then `f"{sorted_key_parts[0]}-{sorted_key_parts[1]}"` **Impact**: Prevents deadlocks in concurrent relationship processing ### 5. Event Loop Management (CRITICAL) **Pattern**: Handle event loop mismatches during shutdown gracefully **Implementation**: Timeout + specific RuntimeError handling for "attached to a different loop" **Location**: Neo4j storage finalization **Impact**: Prevents application shutdown failures ### 6. Async Generator Lock Management (CRITICAL) **Pattern**: Never hold locks across async generator yields - create snapshots instead **Issue**: Holding locks while yielding causes deadlock when consumers need the same lock **Location**: `lightrag/tools/migrate_llm_cache.py` - `stream_default_caches_json` **Solution**: Create snapshot of data while holding lock, release lock, then iterate over snapshot ```python # WRONG - Deadlock prone: async with storage._storage_lock: for key, value in storage._data.items(): batch[key] = value if len(batch) >= batch_size: yield batch # Lock still held! # CORRECT - Snapshot approach: async with storage._storage_lock: matching_items = [(k, v) for k, v in storage._data.items() if condition] # Lock released here for key, value in matching_items: batch[key] = value if len(batch) >= batch_size: yield batch # No lock held ``` **Impact**: Prevents deadlocks in Json→Json migrations and similar scenarios where source/target share locks **Applicable To**: Any async generator that needs to access shared resources while yielding ## Architecture Patterns ### 1. Dependency Injection **Pattern**: Pass configuration through object constructors, not direct imports **Example**: OllamaAPI receives configuration through LightRAG object **Benefit**: Better testability and modularity ### 2. Memory Bank Documentation **Pattern**: Maintain comprehensive memory bank for development continuity **Structure**: Core files (projectbrief.md, activeContext.md, progress.md, etc.) **Purpose**: Essential for context preservation across development sessions ### 3. Configuration Management **Pattern**: Centralize defaults in constants.py, use environment variables for runtime config **Implementation**: Default values in constants, override via .env file **Benefit**: Consistent configuration across components ## Development Workflow Patterns ### 1. Frontend Development (CRITICAL) **Package Manager**: **ALWAYS USE BUN** - Never use npm or yarn unless Bun is unavailable **Commands**: - `bun install` - Install dependencies - `bun run dev` - Start development server - `bun run build` - Build for production - `bun run lint` - Run linting - `bun test` - Run tests - `bun run preview` - Preview production build **Pattern**: All frontend operations must use Bun commands **Fallback**: Only use npm/yarn if Bun installation fails **Testing**: Use `bun test` for all frontend testing ### 2. Bug Fix Approach 1. **Identify root cause** - Don't just fix symptoms 2. **Implement robust solution** - Handle edge cases and format variations 3. **Maintain backward compatibility** - Preserve existing functionality 4. **Add comprehensive error handling** - Graceful degradation 5. **Document the fix** - Update memory bank with technical details ### 3. Feature Implementation 1. **Follow existing patterns** - Maintain architectural consistency 2. **Use dependency injection** - Avoid direct imports between modules 3. **Implement comprehensive error handling** - Handle all failure modes 4. **Add proper logging** - Debug and warning messages 5. **Update documentation** - Memory bank and code comments 6. **Comment Language** - Use English for comments and documentation ### 4. Performance Optimization 1. **Profile before optimizing** - Identify actual bottlenecks 2. **Maintain algorithmic correctness** - Don't sacrifice functionality for speed 3. **Use appropriate data structures** - Match structure to access patterns 4. **Implement caching strategically** - Cache expensive operations 5. **Monitor memory usage** - Prevent memory leaks ### 5. Testing Workflow (CRITICAL) **Pattern**: All tests must use pytest markers for proper CI/CD execution **Test Categories**: - **Offline Tests**: Use `@pytest.mark.offline` - No external dependencies (runs in CI) - **Integration Tests**: Use `@pytest.mark.integration` - Requires databases/APIs (skipped by default) **Commands**: - `pytest tests/ -m offline -v` - CI default (~3 seconds for 21 tests) - `pytest tests/ --run-integration -v` - Full test suite (all 46 tests) **Best Practices**: 1. **Prefer offline tests** - Use mocks for LLM, embeddings, databases 2. **Mock external dependencies** - AsyncMock for async functions 3. **Test isolation** - Each test should be independent 4. **Documentation** - Add docstrings explaining purpose and scope **Configuration**: - `tests/pytest.ini` - Marker definitions and test discovery - `tests/conftest.py` - Fixtures and custom options - `.github/workflows/tests.yml` - CI/CD workflow (Python 3.10/3.11/3.12) **Documentation**: See `memory-bank/testing-guidelines.md` for complete testing guidelines **Impact**: Ensures all tests run reliably in CI without external services while maintaining comprehensive integration test coverage for local development ## Technology Stack Intelligence ### 1. LLM Integration - **Primary**: Gemini 2.5 Flash via custom endpoint - **Embedding**: BAAI/bge-m3 via custom endpoint - **Reranking**: BAAI/bge-reranker-v2-m3 - **Pattern**: Always handle multiple provider formats ### 2. Storage Backends - **Default**: In-memory with file persistence - **Production Options**: PostgreSQL, MongoDB, Redis, Neo4j - **Pattern**: Abstract storage interface with multiple implementations ### 3. API Architecture - **Framework**: FastAPI with Gunicorn for production - **Authentication**: JWT-based with role support - **Compatibility**: Ollama-compatible endpoints for easy integration ### 4. Frontend - **Framework**: React with TypeScript - **Package Manager**: **BUN (REQUIRED)** - Always use Bun for all frontend operations - **Build Tool**: Vite with Bun runtime - **Visualization**: Sigma.js for graph rendering - **State Management**: React hooks with context - **Internationalization**: i18next for multi-language support ## Common Pitfalls and Solutions ### 1. Embedding Format Issues **Pitfall**: Assuming all endpoints return base64-encoded embeddings **Solution**: Always check format and handle both base64 and raw arrays ### 2. Async/Await Patterns **Pitfall**: Calling methods on coroutines instead of awaited results **Solution**: Always await coroutines before accessing their methods ### 3. Data Model Evolution **Pitfall**: Breaking changes when removing fields from dataclasses **Solution**: Filter deprecated fields during deserialization, don't break storage ### 4. Concurrency Issues **Pitfall**: Inconsistent lock key generation causing deadlocks **Solution**: Always sort keys for deterministic lock ordering ### 5. Event Loop Management **Pitfall**: Event loop mismatches during shutdown **Solution**: Implement timeout and specific error handling for loop issues ## Performance Considerations ### 1. Query Context Building - **Algorithm**: Linear gradient weighted polling for fair resource allocation - **Optimization**: Round-robin merging to eliminate mode bias - **Pattern**: Smart chunk selection based on cross-entity occurrence ### 2. Graph Operations - **Optimization**: Batch operations where possible - **Pattern**: Use appropriate indexing for large datasets - **Consideration**: Memory usage with large graphs ### 3. LLM Request Management - **Pattern**: Priority-based queue for request ordering - **Optimization**: Connection pooling and retry mechanisms - **Consideration**: Rate limiting and cost management ## Security Patterns ### 1. Authentication - **Implementation**: JWT tokens with role-based access - **Pattern**: Stateless authentication with configurable expiration - **Security**: Proper token validation and refresh mechanisms ### 2. API Security - **Pattern**: Input validation and sanitization - **Implementation**: FastAPI dependency injection for auth - **Consideration**: Rate limiting and abuse prevention ## Maintenance Guidelines ### 1. Memory Bank Updates - **Trigger**: After significant changes or bug fixes - **Pattern**: Update activeContext.md and progress.md - **Purpose**: Maintain development continuity ### 2. Configuration Management - **Pattern**: Environment-based configuration with sensible defaults - **Implementation**: .env files with example templates - **Consideration**: Security for production deployments ### 3. Error Handling - **Pattern**: Comprehensive logging with appropriate levels - **Implementation**: Graceful degradation where possible - **Consideration**: User-friendly error messages ## Project Evolution Notes The project has evolved from experimental to production-ready status. Key milestones: - **Early 2025**: Basic RAG implementation - **Mid 2025**: Multiple storage backends and LLM providers - **July 2025**: Major query optimization and algorithm improvements - **August 2025**: Production-ready stable state The system now supports enterprise-level deployments with comprehensive functionality across all components. ================================================ FILE: .dockerignore ================================================ # Python-related files and directories __pycache__ .cache # Virtual environment directories *.venv # Env env/ *.env* .env_example # Distribution / build files site dist/ build/ .eggs/ *.egg-info/ *.tgz *.tar.gz # Exclude siles and folders *.yml .dockerignore Dockerfile Makefile # Exclude other projects /tests /scripts /data /dickens /reproduce /output_complete /rag_storage /inputs # Python version manager file .python-version # Reports *.coverage/ *.log log/ *.logfire # Cache .cache/ .mypy_cache .pytest_cache .ruff_cache .gradio .logfire temp/ # MacOS-related files .DS_Store # VS Code settings (local configuration files) .vscode # file TODO.md # Exclude Git-related files .git .github .gitignore .pre-commit-config.yaml ================================================ FILE: .gitattributes ================================================ lightrag/api/webui/** binary lightrag/api/webui/** linguist-generated ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug Report description: File a bug report title: "[Bug]:" labels: ["bug", "triage"] body: - type: checkboxes id: existingcheck attributes: label: Do you need to file an issue? description: Please help us manage our time by avoiding duplicates and common bugs with the steps below. options: - label: I have searched the existing issues and this bug is not already filed. - label: I believe this is a legitimate bug, not just a question or feature request. - type: textarea id: description attributes: label: Describe the bug description: A clear and concise description of what the bug is. placeholder: What went wrong? - type: textarea id: reproduce attributes: label: Steps to reproduce description: Steps to reproduce the behavior. placeholder: How can we replicate the issue? - type: textarea id: expected_behavior attributes: label: Expected Behavior description: A clear and concise description of what you expected to happen. placeholder: What should have happened? - type: textarea id: configused attributes: label: LightRAG Config Used description: The LightRAG configuration used for the run. placeholder: The settings content or LightRAG configuration value: | # Paste your config here - type: textarea id: screenshotslogs attributes: label: Logs and screenshots description: If applicable, add screenshots and logs to help explain your problem. placeholder: Add logs and screenshots here - type: textarea id: additional_information attributes: label: Additional Information description: | - LightRAG Version: e.g., v0.1.1 - Operating System: e.g., Windows 10, Ubuntu 20.04 - Python Version: e.g., 3.8 - Related Issues: e.g., #1 - Any other relevant information. value: | - LightRAG Version: - Operating System: - Python Version: - Related Issues: ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: Feature Request description: File a feature request labels: ["enhancement"] title: "[Feature Request]:" body: - type: checkboxes id: existingcheck attributes: label: Do you need to file a feature request? description: Please help us manage our time by avoiding duplicates and common feature request with the steps below. options: - label: I have searched the existing feature request and this feature request is not already filed. - label: I believe this is a legitimate feature request, not just a question or bug. - type: textarea id: feature_request_description attributes: label: Feature Request Description description: A clear and concise description of the feature request you would like. placeholder: What this feature request add more or improve? - type: textarea id: additional_context attributes: label: Additional Context description: Add any other context or screenshots about the feature request here. placeholder: Any additional information ================================================ FILE: .github/ISSUE_TEMPLATE/question.yml ================================================ name: Question description: Ask a general question labels: ["question"] title: "[Question]:" body: - type: checkboxes id: existingcheck attributes: label: Do you need to ask a question? description: Please help us manage our time by avoiding duplicates and common questions with the steps below. options: - label: I have searched the existing question and discussions and this question is not already answered. - label: I believe this is a legitimate question, not just a bug or feature request. - type: textarea id: question attributes: label: Your Question description: A clear and concise description of your question. placeholder: What is your question? - type: textarea id: context attributes: label: Additional Context description: Provide any additional context or details that might help us understand your question better. placeholder: Add any relevant information here ================================================ FILE: .github/dependabot.yml ================================================ # Keep GitHub Actions up to date with GitHub's Dependabot... # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem version: 2 updates: # ============================================================ # GitHub Actions # PR Strategy: # - All updates (major/minor/patch): Grouped into a single PR # ============================================================ - package-ecosystem: github-actions directory: / groups: github-actions: patterns: - "*" # Group all Actions updates into a single larger pull request schedule: interval: weekly day: monday time: "02:00" timezone: "Asia/Shanghai" labels: - "dependencies" - "github-actions" open-pull-requests-limit: 2 # ============================================================ # Python (pip) Dependencies # PR Strategy: # - Major updates: Individual PR per package (except numpy which is ignored) # - Minor updates: Grouped by category (llm-providers, storage, etc.) # - Patch updates: Grouped by category # ============================================================ - package-ecosystem: "pip" directory: "/" schedule: interval: "weekly" day: "wednesday" time: "02:00" timezone: "Asia/Shanghai" cooldown: default-days: 5 semver-major-days: 30 semver-minor-days: 7 semver-patch-days: 3 groups: # Core dependencies - LLM providers and embeddings llm-providers: patterns: - "openai" - "anthropic" - "google-*" - "boto3" - "botocore" - "ollama" update-types: - "minor" - "patch" # Storage backends storage: patterns: - "neo4j" - "pymongo" - "redis" - "psycopg*" - "asyncpg" - "milvus*" - "qdrant*" update-types: - "minor" - "patch" # Data processing and ML data-processing: patterns: - "numpy" - "scipy" - "pandas" - "tiktoken" - "transformers" - "torch*" update-types: - "minor" - "patch" # Web framework and API web-framework: patterns: - "fastapi" - "uvicorn" - "gunicorn" - "starlette" - "pydantic*" update-types: - "minor" - "patch" # Development and testing tools dev-tools: patterns: - "pytest*" - "ruff" - "pre-commit" - "black" - "mypy" update-types: - "minor" - "patch" # Minor and patch updates for everything else python-minor-patch: patterns: - "*" update-types: - "minor" - "patch" ignore: - dependency-name: "numpy" update-types: - "version-update:semver-major" labels: - "dependencies" - "python" open-pull-requests-limit: 5 # ============================================================ # Frontend (bun) Dependencies # PR Strategy: # - Major updates: Individual PR per package # - Minor updates: Grouped by category (react, ui-components, etc.) # - Patch updates: Grouped by category # ============================================================ - package-ecosystem: "bun" directory: "/lightrag_webui" schedule: interval: "weekly" day: "friday" time: "02:00" timezone: "Asia/Shanghai" cooldown: default-days: 5 semver-major-days: 30 semver-minor-days: 7 semver-patch-days: 3 groups: # React ecosystem react: patterns: - "react" - "react-dom" - "react-router*" - "@types/react*" update-types: - "minor" - "patch" # UI components and styling ui-components: patterns: - "@radix-ui/*" - "tailwind*" - "@tailwindcss/*" - "lucide-react" - "class-variance-authority" - "clsx" update-types: - "minor" - "patch" # Graph visualization graph-viz: patterns: - "sigma" - "@sigma/*" - "graphology*" update-types: - "minor" - "patch" # Build tools and dev dependencies build-tools: patterns: - "vite" - "@vitejs/*" - "typescript" - "eslint*" - "@eslint/*" - "typescript-eslint" - "prettier" - "prettier-*" - "@types/bun" update-types: - "minor" - "patch" # Content rendering libraries (math, diagrams, etc.) content-rendering: patterns: - "katex" - "mermaid" update-types: - "minor" - "patch" # All other minor and patch updates frontend-minor-patch: patterns: - "*" update-types: - "minor" - "patch" labels: - "dependencies" - "frontend" open-pull-requests-limit: 5 ================================================ FILE: .github/pull_request_template.md ================================================ ## Description [Briefly describe the changes made in this pull request.] ## Related Issues [Reference any related issues or tasks addressed by this pull request.] ## Changes Made [List the specific changes made in this pull request.] ## Checklist - [ ] Changes tested locally - [ ] Code reviewed - [ ] Documentation updated (if necessary) - [ ] Unit tests added (if applicable) ## Additional Notes [Add any additional notes or context for the reviewer(s).] ================================================ FILE: .github/workflows/claude.yml ================================================ name: Claude Code on: issue_comment: types: [created] pull_request_review_comment: types: [created] issues: types: [opened, assigned] pull_request_review: types: [submitted] jobs: claude: if: | ( (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) ) && ( github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.review.author_association == 'OWNER' || github.event.review.author_association == 'MEMBER' || github.event.review.author_association == 'COLLABORATOR' || github.event.issue.author_association == 'OWNER' || github.event.issue.author_association == 'MEMBER' || github.event.issue.author_association == 'COLLABORATOR' ) runs-on: ubuntu-latest permissions: contents: write pull-requests: write issues: write id-token: write actions: read # Required for Claude to read CI results on PRs steps: - name: Get PR details for checkout if: github.event.issue.pull_request || github.event_name == 'pull_request_review_comment' || github.event_name == 'pull_request_review' id: pr_details env: GH_TOKEN: ${{ github.token }} run: | # Get PR number from the event if [ "${{ github.event_name }}" == "issue_comment" ]; then PR_NUMBER=${{ github.event.issue.number }} elif [ "${{ github.event_name }}" == "pull_request_review_comment" ]; then PR_NUMBER=${{ github.event.pull_request.number }} elif [ "${{ github.event_name }}" == "pull_request_review" ]; then PR_NUMBER=${{ github.event.pull_request.number }} fi if [ -n "$PR_NUMBER" ]; then echo "Fetching PR #$PR_NUMBER details" PR_DATA=$(gh pr view $PR_NUMBER -R ${{ github.repository }} --json headRefName,headRepository,headRepositoryOwner) HEAD_REF=$(echo "$PR_DATA" | jq -r '.headRefName') HEAD_REPO=$(echo "$PR_DATA" | jq -r '.headRepository.name') HEAD_OWNER=$(echo "$PR_DATA" | jq -r '.headRepositoryOwner.login') echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT echo "head_ref=$HEAD_REF" >> $GITHUB_OUTPUT echo "head_repo=$HEAD_REPO" >> $GITHUB_OUTPUT echo "head_owner=$HEAD_OWNER" >> $GITHUB_OUTPUT echo "repository=$HEAD_OWNER/$HEAD_REPO" >> $GITHUB_OUTPUT fi - name: Checkout repository uses: actions/checkout@v6 with: repository: ${{ steps.pr_details.outputs.repository || github.repository }} ref: ${{ steps.pr_details.outputs.head_ref || github.ref }} fetch-depth: 0 - name: Run Claude Code id: claude uses: anthropics/claude-code-action@v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} # This is an optional setting that allows Claude to read CI results on PRs additional_permissions: | actions: read # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. # prompt: 'Update the pull request description to include a summary of changes.' # Optional: Add claude_args to customize behavior and configuration # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md # or https://code.claude.com/docs/en/cli-reference for available options # claude_args: '--allowed-tools Bash(gh pr:*)' ================================================ FILE: .github/workflows/copilot-setup-steps.yml ================================================ name: "Copilot Setup Steps" # Automatically run the setup steps when they are changed to allow for easy validation, and # allow manual testing through the repository's "Actions" tab on: workflow_dispatch: push: paths: - .github/workflows/copilot-setup-steps.yml pull_request: paths: - .github/workflows/copilot-setup-steps.yml jobs: # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot. copilot-setup-steps: runs-on: ubuntu-latest # Timeout after 30 minutes (maximum is 59) timeout-minutes: 30 # You can define any steps you want, and they will run before the agent starts. # If you do not check out your code, Copilot will do this for you. steps: - name: Checkout code uses: actions/checkout@v6 - name: Set up Python 3.11 uses: actions/setup-python@v6 with: python-version: '3.11' - name: Cache pip packages uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-copilot-${{ hashFiles('**/pyproject.toml') }} restore-keys: | ${{ runner.os }}-pip-copilot- ${{ runner.os }}-pip- - name: Install Python dependencies run: | python -m pip install --upgrade pip pip install -e ".[api]" pip install pytest pytest-asyncio httpx - name: Create minimal frontend stub for Copilot agent run: | mkdir -p lightrag/api/webui echo 'LightRAG - Copilot Agent

Copilot Agent Mode

' > lightrag/api/webui/index.html echo "Created minimal frontend stub for Copilot agent environment" - name: Verify installation run: | python --version pip list | grep lightrag lightrag-server --help || echo "Note: Server requires .env configuration to run" ================================================ FILE: .github/workflows/docker-build-lite.yml ================================================ name: Build Lite Docker Image on: workflow_dispatch: inputs: _notes_: description: '⚠️ Create lite Docker images only after non-trivial version releases.' required: false type: boolean default: false permissions: contents: read packages: write jobs: build-and-push-lite: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v6 with: fetch-depth: 0 - name: Get latest tag id: get_tag run: | LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "") if [ -z "$LATEST_TAG" ]; then LATEST_TAG="sha-$(git rev-parse --short HEAD)" echo "No tags found, using commit SHA: $LATEST_TAG" else echo "Latest tag found: $LATEST_TAG" fi echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT - name: Prepare lite tag id: lite_tag run: | LITE_TAG="${{ steps.get_tag.outputs.tag }}-lite" echo "Lite image tag: $LITE_TAG" echo "lite_tag=$LITE_TAG" >> $GITHUB_OUTPUT - name: Update version in __init__.py run: | sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py cat lightrag/__init__.py | grep __version__ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Extract metadata for Docker id: meta uses: docker/metadata-action@v6 with: images: ghcr.io/${{ github.repository }} tags: | type=raw,value=${{ steps.lite_tag.outputs.lite_tag }} type=raw,value=lite - name: Build and push lite Docker image uses: docker/build-push-action@v7 with: context: . file: ./Dockerfile.lite platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=min - name: Output image details run: | echo "Lite Docker image built and pushed successfully!" echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.lite_tag.outputs.lite_tag }}" echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}" ================================================ FILE: .github/workflows/docker-build-manual.yml ================================================ name: Build Test Docker Image manually on: workflow_dispatch: inputs: _notes_: description: '⚠️ Please create a new git tag before building the docker image.' required: false type: boolean default: false permissions: contents: read packages: write jobs: build-and-push: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for tags - name: Get latest tag id: get_tag run: | # Get the latest tag, fallback to commit SHA if no tags exist LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "") if [ -z "$LATEST_TAG" ]; then LATEST_TAG="sha-$(git rev-parse --short HEAD)" echo "No tags found, using commit SHA: $LATEST_TAG" else echo "Latest tag found: $LATEST_TAG" fi echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT echo "image_tag=$LATEST_TAG" >> $GITHUB_OUTPUT - name: Update version in __init__.py run: | sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py echo "Updated __init__.py with version ${{ steps.get_tag.outputs.tag }}" cat lightrag/__init__.py | grep __version__ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Extract metadata for Docker id: meta uses: docker/metadata-action@v6 with: images: ghcr.io/${{ github.repository }} tags: | type=raw,value=${{ steps.get_tag.outputs.tag }} - name: Build and push Docker image uses: docker/build-push-action@v7 with: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - name: Output image details run: | echo "Docker image built and pushed successfully!" echo "Image tags:" echo " - ghcr.io/${{ github.repository }}:${{ steps.get_tag.outputs.tag }}" echo "Latest Git tag used: ${{ steps.get_tag.outputs.tag }}" ================================================ FILE: .github/workflows/docker-publish.yml ================================================ name: Build Latest Docker Image on Release on: release: types: [published] workflow_dispatch: permissions: contents: read packages: write jobs: build-and-push: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for tags - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Get latest tag id: get_tag run: | TAG=$(git describe --tags --abbrev=0) echo "Found tag: $TAG" echo "tag=$TAG" >> $GITHUB_OUTPUT - name: Check if pre-release id: check_prerelease run: | TAG="${{ steps.get_tag.outputs.tag }}" if [[ "$TAG" == *"rc"* ]] || [[ "$TAG" == *"dev"* ]]; then echo "is_prerelease=true" >> $GITHUB_OUTPUT echo "This is a pre-release version: $TAG" else echo "is_prerelease=false" >> $GITHUB_OUTPUT echo "This is a stable release: $TAG" fi - name: Update version in __init__.py run: | sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py echo "Updated __init__.py with version ${{ steps.get_tag.outputs.tag }}" cat lightrag/__init__.py | grep __version__ - name: Extract metadata for Docker id: meta uses: docker/metadata-action@v6 with: images: ghcr.io/${{ github.repository }} tags: | type=raw,value=${{ steps.get_tag.outputs.tag }} type=raw,value=latest,enable=${{ steps.check_prerelease.outputs.is_prerelease == 'false' }} - name: Build and push Docker image uses: docker/build-push-action@v7 with: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max ================================================ FILE: .github/workflows/linting.yaml ================================================ name: Linting and Formatting on: push: branches: - main pull_request: branches: - main jobs: lint-and-format: name: Linting and Formatting runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install pre-commit - name: Run pre-commit run: pre-commit run --all-files --show-diff-on-failure ================================================ FILE: .github/workflows/pypi-publish.yml ================================================ name: Upload LightRAG-hku Package on: release: types: [published] workflow_dispatch: permissions: contents: read jobs: release-build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for tags # Build frontend WebUI - name: Setup Bun uses: oven-sh/setup-bun@v2 with: bun-version: latest - name: Build Frontend WebUI run: | cd lightrag_webui bun install --frozen-lockfile bun run build cd .. - name: Verify Frontend Build run: | if [ ! -f "lightrag/api/webui/index.html" ]; then echo "❌ Error: Frontend build failed - index.html not found" exit 1 fi echo "✅ Frontend build verified" echo "Frontend files:" ls -lh lightrag/api/webui/ | head -10 - uses: actions/setup-python@v6 with: python-version: "3.x" - name: Get version from tag id: get_version run: | TAG=$(git describe --tags --abbrev=0) echo "Found tag: $TAG" echo "Extracted version: $TAG" echo "version=$TAG" >> $GITHUB_OUTPUT - name: Update version in __init__.py run: | sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_version.outputs.version }}\"/" lightrag/__init__.py echo "Updated __init__.py with version ${{ steps.get_version.outputs.version }}" cat lightrag/__init__.py | grep __version__ - name: Build release distributions run: | python -m pip install build python -m build - name: Upload distributions uses: actions/upload-artifact@v7 with: name: release-dists path: dist/ pypi-publish: runs-on: ubuntu-latest needs: - release-build permissions: id-token: write environment: name: pypi steps: - name: Retrieve release distributions uses: actions/download-artifact@v8 with: name: release-dists path: dist/ - name: Publish release distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: dist/ ================================================ FILE: .github/workflows/stale.yaml ================================================ # .github/workflows/stale.yml name: Mark stale issues and pull requests on: schedule: - cron: '30 22 * * *' # run at 22:30+08 every day permissions: issues: write pull-requests: write jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v10 with: days-before-stale: 90 # 90 days days-before-close: 7 # 7 days after marked as stale stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' close-issue-message: 'This issue has been automatically closed because it has not had recent activity. Please open a new issue if you still have this problem.' stale-pr-message: 'This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs.' close-pr-message: 'This pull request has been automatically closed because it has not had recent activity.' # If there are specific labels, exempt them from being marked as stale, for example: exempt-issue-labels: 'enhancement,tracked' # exempt-pr-labels: 'bug,enhancement,help wanted' repo-token: ${{ secrets.GITHUB_TOKEN }} # token provided by GitHub ================================================ FILE: .github/workflows/tests.yml ================================================ name: Offline Unit Tests on: push: branches: [ main, dev ] pull_request: branches: [ main, dev ] jobs: offline-tests: name: Offline Tests runs-on: ubuntu-latest strategy: matrix: python-version: ['3.12', '3.14'] steps: - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Cache pip packages uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml') }} restore-keys: | ${{ runner.os }}-pip- - name: Install dependencies run: | python -m pip install --upgrade pip pip install -e ".[api]" pip install pytest pytest-asyncio - name: Run offline tests run: | # Run only tests marked as 'offline' (no external dependencies) # Integration tests requiring databases/APIs are skipped by default pytest tests/ -m offline -v --tb=short - name: Upload test results if: always() uses: actions/upload-artifact@v7 with: name: test-results-py${{ matrix.python-version }} path: | .pytest_cache/ test-results.xml retention-days: 7 ================================================ FILE: .gitignore ================================================ # Python-related files __pycache__/ *.py[cod] *.egg-info/ .eggs/ *.tgz *.tar.gz *.ini # Virtual Environment .venv/ venv/ # Enviroment Variable Files .env .env.backup.* # Generated Docker Compose files (output of setup wizard) docker-compose.*.yml # Build / Distribution dist/ build/ site/ # Logs / Reports *.log *.log.* *.logfire *.coverage/ log/ # Caches .cache/ .mypy_cache/ .pytest_cache/ .ruff_cache/ .gradio/ .history/ temp/ # IDE / Editor Files .idea/ .vscode/ .vscode/settings.json # Framework-specific files local_neo4jWorkDir/ neo4jWorkDir/ # Data & Storage inputs/ output/ rag_storage/ data/ # Evaluation results lightrag/evaluation/results/ # Miscellaneous .DS_Store TODO.md ignore_this.txt *.ignore.* # Project-specific files /dickens*/ /book.txt download_models_hf.py # Frontend build output (built during PyPI release) /lightrag/api/webui/ # temporary test files in project root /test_* # Cline files memory-bank .claude/CLAUDE.md .claude/ # Claude Code CLAUDE.md ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: trailing-whitespace exclude: ^lightrag/api/webui/ - id: end-of-file-fixer exclude: ^lightrag/api/webui/ - id: requirements-txt-fixer exclude: ^lightrag/api/webui/ - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.6.4 hooks: - id: ruff-format exclude: ^lightrag/api/webui/ - id: ruff args: [--fix, --ignore=E402] exclude: ^lightrag/api/webui/ - repo: https://github.com/mgedmin/check-manifest rev: "0.49" hooks: - id: check-manifest stages: [manual] exclude: ^lightrag/api/webui/ ================================================ FILE: AGENTS.md ================================================ # Repository Guidelines LightRAG is an advanced Retrieval-Augmented Generation (RAG) framework designed to enhance information retrieval and generation through graph-based knowledge representation. ## Project Structure & Module Organization - `lightrag/`: Core Python package with orchestrators (`lightrag/lightrag.py`), storage adapters in `kg/`, LLM bindings in `llm/`, and helpers such as `operate.py` and `utils_*.py`. - `lightrag-api/`: FastAPI service (`lightrag_server.py`) with routers under `routers/` and Gunicorn launcher `run_with_gunicorn.py`. - `lightrag_webui/`: React 19 + TypeScript client driven by Bun + Vite; UI components live in `src/`. - `scripts/setup/`: Interactive environment setup wizard. `setup.sh` orchestrates staged `--base` / `--storage` / `--server` / validation flows, `lib/` holds prompt/validation/file helpers, and `templates/*.yml` contains compose fragments for bundled services. - Tests live in `tests/` and root-level `test_*.py`. Working datasets stay in `inputs/`, `rag_storage/`, `temp/`; deployment collateral lives in `docs/`, `k8s-deploy/`, and `docker-compose.yml`. - `Makefile`: Canonical entry point for the setup wizard and local developer shortcuts; prefer documented targets over invoking ad hoc shell snippets. ## Build, Test, and Development Commands - `python -m venv .venv && source .venv/bin/activate`: set up the Python runtime. - `pip install -e .` / `pip install -e .[api]`: install the package and API extras in editable mode. - `make env-base`: first-run interactive setup for LLM, embedding, and reranker configuration; writes `.env` and may generate `docker-compose.final.yml`. - `make env-storage`, `make env-server`: optional follow-up wizard stages for storage backends and server/security/SSL settings; both reuse the existing `.env`. - `make env-validate`, `make env-security-check`, `make env-backup`: validate, audit, or back up the current `.env` via the setup wizard. - `lightrag-server` or `uvicorn lightrag.api.lightrag_server:app --reload`: start the API locally; ensure `.env` is present. - `python -m pytest tests` (offline markers apply by default) or `python -m pytest tests --run-integration` / `python test_graph_storage.py`: run the full suite, opt into integration coverage, or target an individual script. - `ruff check .`: lint Python sources before committing. - `bun install`, `bun run dev`, `bun run build`, `bun test`: manage the web UI workflow (Bun is mandatory). ## Coding Style & Naming Conventions - Backend code follow PEP 8 with four-space indentation, annotate functions, and reach for dataclasses when modelling state. - Use `lightrag.utils.logger` instead of `print`; respect logger configuration flags. - Extend storage or pipeline abstractions via `lightrag.base` and keep reusable helpers in the existing `utils_*.py`. - Python modules remain lowercase with underscores; React components use `PascalCase.tsx` and hooks-first patterns. - Front-end code should remain in TypeScript with two-space indentation, rely on functional React components with hooks, and follow Tailwind utility style. ## Testing Guidelines - Keep pytest additions close to the code you touch (`tests/` mirrors feature folders and there are root-level `test_*.py` helpers); functions must start with `test_`. - Follow `tests/pytest.ini`: markers include `offline`, `integration`, `requires_db`, and `requires_api`, and the suite runs with `-m "not integration"` by default—pass `--run-integration` (or set `LIGHTRAG_RUN_INTEGRATION=true`) when external services are available. - Use the custom CLI toggles from `tests/conftest.py`: `--keep-artifacts`/`LIGHTRAG_KEEP_ARTIFACTS=true`, `--stress-test`/`LIGHTRAG_STRESS_TEST=true`, and `--test-workers N`/`LIGHTRAG_TEST_WORKERS` to dial up workloads or preserve temp files during investigations. - Export other required `LIGHTRAG_*` environment variables before running integration or storage tests so adapters can reach configured backends. - For UI updates, pair changes with Vitest specs and run `bun test`. ## Commit & Pull Request Guidelines - Use concise, imperative commit subjects (e.g., `Fix lock key normalization`) and add body context only when necessary. - PRs should include a summary, operational impact, linked issues, and screenshots or API samples for user-facing work. - Verify `ruff check .`, `python -m pytest`, and affected Bun commands succeed before requesting review; note the runs in the PR text. - This repo is a fork of `HKUDS/LightRAG`. Always target **`HKUDS/LightRAG:main`** (upstream) when creating PRs, not the fork's own main. ## Security & Configuration Tips - Copy `.env.example` and `config.ini.example`; never commit secrets or real connection strings. - Configure storage backends through `LIGHTRAG_*` variables and validate them with `docker-compose` services when needed. - Treat `lightrag.log*` as local artefacts; purge sensitive information before sharing logs or outputs. ## Automation & Agent Workflow - Use repo-relative `workdir` arguments for every shell command and prefer `rg`/`rg --files` for searches since they are faster under the CLI harness. - Default edits to ASCII, rely on `apply_patch` for single-file changes, and only add concise comments that aid comprehension of complex logic. - Honor existing local modifications; never revert or discard user changes (especially via `git reset --hard`) unless explicitly asked. - Follow the planning tool guidance: skip it for trivial fixes, but provide multi-step plans for non-trivial work and keep the plan updated as steps progress. - Validate changes by running the relevant `ruff`/`pytest`/`bun test` commands whenever feasible, and describe any unrun checks with follow-up guidance. - For Codex and other fresh-shell automation, prefer `./scripts/test.sh` instead of bare `pytest`; the script falls back through `PYTHON`, the active virtualenv, `uv`, `.venv`, and `venv` before trying `python` or `python3`. - For setup workflow changes, prefer `make env-*` targets over calling `scripts/setup/setup.sh` directly; the `Makefile` resolves a Bash 4+ interpreter for macOS/Linux compatibility. - When editing setup logic, keep `.env` host-usable and treat `docker-compose.final.yml` as generated output assembled from `scripts/setup/templates/*.yml`; compose-only overrides belong in the wizard-managed compose layer rather than being persisted back into `.env`. ================================================ FILE: Dockerfile ================================================ # syntax=docker/dockerfile:1 # Frontend build stage # Build frontend assets on the native build platform to avoid # cross-architecture emulation issues during multi-platform builds. FROM --platform=$BUILDPLATFORM oven/bun:1 AS frontend-builder WORKDIR /app # Copy frontend source code COPY lightrag_webui/ ./lightrag_webui/ # Build frontend assets for inclusion in the API package RUN --mount=type=cache,target=/root/.bun/install/cache \ cd lightrag_webui \ && bun install --frozen-lockfile \ && bun run build # Python build stage - using uv for faster package installation FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder ENV DEBIAN_FRONTEND=noninteractive ENV UV_SYSTEM_PYTHON=1 ENV UV_COMPILE_BYTECODE=1 WORKDIR /app # Install system deps (Rust is required by some wheels) RUN apt-get update \ && apt-get install -y --no-install-recommends \ curl \ build-essential \ pkg-config \ && rm -rf /var/lib/apt/lists/* \ && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}" # Ensure shared data directory exists for uv caches RUN mkdir -p /root/.local/share/uv # Copy project metadata and sources COPY pyproject.toml . COPY setup.py . COPY uv.lock . # Install base, API, and offline extras without the project to improve caching RUN --mount=type=cache,target=/root/.local/share/uv \ uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable # Copy project sources after dependency layer COPY lightrag/ ./lightrag/ # Include pre-built frontend assets from the previous stage COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui # Sync project in non-editable mode and ensure pip is available for runtime installs RUN --mount=type=cache,target=/root/.local/share/uv \ uv sync --frozen --no-dev --extra api --extra offline --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade # Prepare offline cache directory and pre-populate tiktoken data # Use uv run to execute commands from the virtual environment RUN mkdir -p /app/data/tiktoken \ && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \ if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi # Final stage FROM python:3.12-slim WORKDIR /app # Install uv for package management COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv ENV UV_SYSTEM_PYTHON=1 # Copy installed packages and application code COPY --from=builder /root/.local /root/.local COPY --from=builder /app/.venv /app/.venv COPY --from=builder /app/lightrag ./lightrag COPY pyproject.toml . COPY setup.py . COPY uv.lock . # Ensure the installed scripts are on PATH ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH # Install dependencies with uv sync (uses locked versions from uv.lock) # And ensure pip is available for runtime installs RUN --mount=type=cache,target=/root/.local/share/uv \ uv sync --frozen --no-dev --extra api --extra offline --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade # Create persistent data directories AFTER package installation RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken # Copy offline cache into the newly created directory COPY --from=builder /app/data/tiktoken /app/data/tiktoken # Point to the prepared cache ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken ENV WORKING_DIR=/app/data/rag_storage ENV INPUT_DIR=/app/data/inputs # Expose API port EXPOSE 9621 ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"] ================================================ FILE: Dockerfile.lite ================================================ # syntax=docker/dockerfile:1 # Frontend build stage # Build frontend assets on the native build platform to avoid # cross-architecture emulation issues during multi-platform builds. FROM --platform=$BUILDPLATFORM oven/bun:1 AS frontend-builder WORKDIR /app # Copy frontend source code COPY lightrag_webui/ ./lightrag_webui/ # Build frontend assets for inclusion in the API package RUN --mount=type=cache,target=/root/.bun/install/cache \ cd lightrag_webui \ && bun install --frozen-lockfile \ && bun run build # Python build stage - using uv for package installation FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder ENV DEBIAN_FRONTEND=noninteractive ENV UV_SYSTEM_PYTHON=1 ENV UV_COMPILE_BYTECODE=1 WORKDIR /app # Install system dependencies required by some wheels RUN apt-get update \ && apt-get install -y --no-install-recommends \ curl \ build-essential \ pkg-config \ && rm -rf /var/lib/apt/lists/* \ && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}" # Ensure shared data directory exists for uv caches RUN mkdir -p /root/.local/share/uv # Copy project metadata and sources COPY pyproject.toml . COPY setup.py . COPY uv.lock . # Install project dependencies (base + API extras) without the project to improve caching RUN --mount=type=cache,target=/root/.local/share/uv \ uv sync --frozen --no-dev --extra api --no-install-project --no-editable # Copy project sources after dependency layer COPY lightrag/ ./lightrag/ # Include pre-built frontend assets from the previous stage COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui # Sync project in non-editable mode and ensure pip is available for runtime installs RUN --mount=type=cache,target=/root/.local/share/uv \ uv sync --frozen --no-dev --extra api --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade # Prepare tiktoken cache directory and pre-populate tokenizer data # Ignore exit code 2 which indicates assets already cached RUN mkdir -p /app/data/tiktoken \ && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \ if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi # Final stage FROM python:3.12-slim WORKDIR /app # Install uv for package management COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv ENV UV_SYSTEM_PYTHON=1 # Copy installed packages and application code COPY --from=builder /root/.local /root/.local COPY --from=builder /app/.venv /app/.venv COPY --from=builder /app/lightrag ./lightrag COPY pyproject.toml . COPY setup.py . COPY uv.lock . # Ensure the installed scripts are on PATH ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH # Sync dependencies inside the final image using uv # And ensure pip is available for runtime installs RUN --mount=type=cache,target=/root/.local/share/uv \ uv sync --frozen --no-dev --extra api --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade # Create persistent data directories RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken # Copy cached tokenizer assets prepared in the builder stage COPY --from=builder /app/data/tiktoken /app/data/tiktoken # Docker data directories ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken ENV WORKING_DIR=/app/data/rag_storage ENV INPUT_DIR=/app/data/inputs # Expose API port EXPOSE 9621 # Set entrypoint ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2025 LightRAG Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MANIFEST.in ================================================ include requirements.txt include lightrag/api/requirements.txt recursive-include lightrag/api/webui * recursive-include lightrag/api/static * ================================================ FILE: Makefile ================================================ SHELL := /bin/bash SETUP_SCRIPT := scripts/setup/setup.sh SETUP_BASH ?= $(or $(firstword $(wildcard /opt/homebrew/bin/bash /usr/local/bin/bash /opt/local/bin/bash)),$(shell command -v bash 2>/dev/null),bash) SETUP_OPTS ?= COLOR_RESET := \033[0m COLOR_BOLD := \033[1m COLOR_BLUE := \033[34m COLOR_GREEN := \033[32m COLOR_YELLOW := \033[33m ifeq ($(NO_COLOR),1) COLOR_RESET := COLOR_BOLD := COLOR_BLUE := COLOR_GREEN := COLOR_YELLOW := endif .PHONY: help configure env-base env-storage env-server env-validate env-backup env-security-check env-base-rewrite env-storage-rewrite env base storage server validate backup security security-check base-rewrite storage-rewrite help: @printf "$(COLOR_BOLD)Interactive setup targets$(COLOR_RESET)\n" @printf " $(COLOR_GREEN)make env-base$(COLOR_RESET) Configure LLM, embedding, and reranker (run first)\n" @printf " $(COLOR_GREEN)make env-storage$(COLOR_RESET) Configure storage backends and databases\n" @printf " $(COLOR_GREEN)make env-server$(COLOR_RESET) Configure server, security, and SSL\n" @printf " $(COLOR_GREEN)make env-validate$(COLOR_RESET) Validate existing .env\n" @printf " $(COLOR_GREEN)make env-security-check$(COLOR_RESET) Audit existing .env for security risks\n" @printf " $(COLOR_GREEN)make env-backup$(COLOR_RESET) Backup current .env\n" @printf " $(COLOR_GREEN)make env-base-rewrite$(COLOR_RESET) Force-regenerate wizard-managed compose services during base setup\n" @printf " $(COLOR_GREEN)make env-storage-rewrite$(COLOR_RESET) Force-regenerate wizard-managed compose services during storage setup\n" @printf " $(COLOR_GREEN)make base$(COLOR_RESET) Short form of make env-base (all env prefix can be stripped)\n" @printf "\n" @printf "$(COLOR_BOLD)Typical workflow$(COLOR_RESET)\n" @printf " 1. make env-base # set LLM/embedding/reranker\n" @printf " 2. make env-storage # set storage backends (optional)\n" @printf " 3. make env-server # set port/security/SSL (optional)\n\n" @printf "$(COLOR_BOLD)Examples$(COLOR_RESET)\n" @printf " make env-base\n" @printf " make env-storage SETUP_OPTS=--debug\n" @printf " make env-server\n\n" @printf " make env-storage-rewrite\n\n" @printf " make env-security-check\n\n" @printf "$(COLOR_BOLD)Compose Output$(COLOR_RESET)\n" @printf " Bundled service images are defined in scripts/setup/templates/*.yml.\n" @printf " Compose file output: docker-compose.final.yml\n" env-base env base configure: @$(SETUP_BASH) $(SETUP_SCRIPT) --base $(SETUP_OPTS) env-storage storage: @$(SETUP_BASH) $(SETUP_SCRIPT) --storage $(SETUP_OPTS) env-base-rewrite base-rewrite: @$(SETUP_BASH) $(SETUP_SCRIPT) --base --rewrite-compose $(SETUP_OPTS) env-storage-rewrite storage-rewrite: @$(SETUP_BASH) $(SETUP_SCRIPT) --storage --rewrite-compose $(SETUP_OPTS) env-server server: @$(SETUP_BASH) $(SETUP_SCRIPT) --server $(SETUP_OPTS) env-validate validate: @$(SETUP_BASH) $(SETUP_SCRIPT) --validate $(SETUP_OPTS) env-security-check security security-check: @$(SETUP_BASH) $(SETUP_SCRIPT) --security-check $(SETUP_OPTS) env-backup backup: @$(SETUP_BASH) $(SETUP_SCRIPT) --backup $(SETUP_OPTS) ================================================ FILE: README-zh.md ================================================
LightRAG Logo
# 🚀 LightRAG: 简单且快速的检索增强生成(RAG)框架
HKUDS%2FLightRAG | Trendshift

LightRAG Diagram
---
LiteWrite
--- ## 🎉 新闻 - [2025.11]🎯[新功能]: 集成了 **RAGAS 评估**和 **Langfuse 追踪**。更新了 API 以在查询结果中返回召回上下文,支持上下文精度指标。 - [2025.10]🎯[可扩展性增强]: 消除了处理瓶颈,以高效支持**大规模数据集**。 - [2025.09]🎯[新功能]: 显著提升了 Qwen3-30B-A3B 等**开源 LLM** 的知识图谱提取准确性。 - [2025.08]🎯[新功能]: 现已支持 **Reranker**,显著提升混合查询性能(已设为默认查询模式)。 - [2025.08]🎯[新功能]: 添加了**文档删除**功能,并支持自动重新生成知识图谱,以确保最佳查询性能。 - [2025.06]🎯[新发布]: 我们的团队发布了 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) —— 一个用于无缝处理文本、图像、表格和方程式的**全功能多模态 RAG** 系统。 - [2025.06]🎯[新功能]: LightRAG 现已集成 [RAG-Anything](https://github.com/HKUDS/RAG-Anything),支持全面的多模态数据处理,实现对 PDF、图像、Office 文档、表格和公式等多种格式的无缝文档解析和 RAG 能力。详见[多模态文档处理部分](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration)。 - [2025.03]🎯[新功能]: LightRAG 现已支持引用功能,实现了准确的源归因和增强的文档可追溯性。 - [2025.02]🎯[新功能]: 现在您可以使用 MongoDB 作为一体化存储解决方案,实现统一的数据管理。 - [2025.02]🎯[新发布]: 我们的团队发布了 [VideoRAG](https://github.com/HKUDS/VideoRAG) —— 一个用于理解超长上下文视频的 RAG 系统。 - [2025.01]🎯[新发布]: 我们的团队发布了 [MiniRAG](https://github.com/HKUDS/MiniRAG),使用小型模型简化 RAG。 - [2025.01]🎯现在您可以使用 PostgreSQL 作为一体化存储解决方案进行数据管理。 - [2024.11]🎯[新资源]: LightRAG 的综合指南现已在 [LearnOpenCV](https://learnopencv.com/lightrag) 上发布 —— 探索深入的教程和最佳实践。非常感谢博客作者的杰出贡献! - [2024.11]🎯[新功能]: 推出 LightRAG WebUI —— 一个允许您通过直观的 Web 界面插入、查询和可视化 LightRAG 知识的仪表板。 - [2024.11]🎯[新功能]: 现在您可以[使用 Neo4J 进行存储](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) —— 开启图数据库支持。 - [2024.10]🎯[新功能]: 我们添加了 [LightRAG 介绍视频](https://youtu.be/oageL-1I0GE) 的链接 —— 演示 LightRAG 的各项功能。感谢作者的杰出贡献! - [2024.10]🎯[新频道]: 我们创建了一个 [Discord 频道](https://discord.gg/yF2MmDJyGJ)!💬 欢迎加入我们的社区进行分享、讨论和协作! 🎉🎉 - [2024.10]🎯[新功能]: LightRAG 现在支持 [Ollama 模型](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
算法流程图 ![LightRAG索引流程图](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-VectorDB-Json-KV-Store-Indexing-Flowchart-scaled.jpg) *图1:LightRAG索引流程图 - 图片来源:[Source](https://learnopencv.com/lightrag/)* ![LightRAG检索和查询流程图](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-Querying-Flowchart-Dual-Level-Retrieval-Generation-Knowledge-Graphs-scaled.jpg) *图2:LightRAG检索和查询流程图 - 图片来源:[Source](https://learnopencv.com/lightrag/)*
## 安装 > **💡 使用 uv 进行包管理**: 本项目使用 [uv](https://docs.astral.sh/uv/) 进行快速可靠的 Python 包管理。 > 首先安装 uv: `curl -LsSf https://astral.sh/uv/install.sh | sh` (Unix/macOS) 或 `powershell -c "irm https://astral.sh/uv/install.ps1 | iex"` (Windows) > > **注意**:如果您愿意,也可以使用 pip,但为了获得更好的性能 and 更可靠的依赖管理,建议使用 uv。 > > **📦 离线部署**: 对于离线或隔离环境,请参阅[离线部署指南](./docs/OfflineDeployment.md),了解预安装所有依赖项和缓存文件的说明。 ### 安装LightRAG服务器 LightRAG服务器旨在提供Web UI和API支持。Web UI便于文档索引、知识图谱探索和简单的RAG查询界面。LightRAG服务器还提供兼容Ollama的接口,旨在将LightRAG模拟为Ollama聊天模型。这使得AI聊天机器人(如Open WebUI)可以轻松访问LightRAG。 * 从PyPI安装 ```bash ### 使用 uv 安装 LightRAG 服务器(作为工具,推荐) uv tool install "lightrag-hku[api]" ### 或使用 pip # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install "lightrag-hku[api]" ### 构建前端代码 cd lightrag_webui bun install --frozen-lockfile bun run build cd .. # 配置 env 文件 # 从 GitHub 仓库的根目录上下载 env.example 文件 # 或从本地检出的源代码中获取 env.example 文件 cp env.example .env # 使用你的LLM和Embedding模型访问参数更新.env文件 # 启动API-WebUI服务 lightrag-server ``` * 从源代码安装 ```bash git clone https://github.com/HKUDS/LightRAG.git cd LightRAG # 使用 uv (推荐) # 注意: uv sync 会自动在 .venv/ 目录创建虚拟环境 uv sync --extra api source .venv/bin/activate # 激活虚拟环境 (Linux/macOS) # Windows 系统: .venv\Scripts\activate ### 或使用 pip 和虚拟环境 # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install -e ".[api]" # 构建前端代码 cd lightrag_webui bun install --frozen-lockfile bun run build cd .. # 配置 env 文件 cp env.example .env # 使用你的LLM和Embedding模型访问参数更新.env文件 # 启动API-WebUI服务 lightrag-server ``` * 使用 Docker Compose 启动 LightRAG 服务器 ```bash git clone https://github.com/HKUDS/LightRAG.git cd LightRAG cp env.example .env # 使用你的LLM和Embedding模型访问参数更新.env文件 # modify LLM and Embedding settings in .env docker compose up ``` > 在此获取LightRAG docker镜像历史版本: [LightRAG Docker Images]( https://github.com/HKUDS/LightRAG/pkgs/container/lightrag) ### 使用 Setup 工具创建 .env 文件 除了手动编辑 `env.example` 之外,您还可以使用交互式向导生成配置好的 `.env`,并在需要时生成 `docker-compose.final.yml`: ```bash make env-base # 必跑第一步:配置 LLM、Embedding、Reranker make env-storage # 可选:配置存储后端和数据库服务 make env-server # 可选:配置服务端口、鉴权和 SSL make env-base-rewrite # 可选:强制重建向导托管的 compose 服务块 make env-storage-rewrite # 可选:强制重建向导托管的 compose 服务块 make env-security-check # 可选:审计当前 .env 中的安全风险 ``` 每个目标的详细说明请参阅 [docs/InteractiveSetup.md](./docs/InteractiveSetup.md)。 这些 setup 向导只负责更新配置;如需在部署前审计当前 `.env` 的安全风险,请额外运行 `make env-security-check`。 默认情况下,重新运行 setup 会保留未变化的向导托管 compose 服务块;只有在需要按模板强制重建这些托管块时,才使用 `*-rewrite` 目标。 ### 安装LightRAG Core * 从源代码安装(推荐) ```bash cd LightRAG # 注意: uv sync 会自动在 .venv/ 目录创建虚拟环境 uv sync source .venv/bin/activate # 激活虚拟环境 (Linux/macOS) # Windows 系统: .venv\Scripts\activate # 或: pip install -e . ``` * 从PyPI安装 ```bash uv pip install lightrag-hku # 或: pip install lightrag-hku ``` ## 快速开始 ### LightRAG的LLM及配套技术栈要求 LightRAG对大型语言模型(LLM)的能力要求远高于传统RAG,因为它需要LLM执行文档中的实体关系抽取任务。配置合适的Embedding和Reranker模型对提高查询表现也至关重要。 - **LLM选型**: - 推荐选用参数量至少为32B的LLM。 - 上下文长度至少为32KB,推荐达到64KB。 - 在文档索引阶段不建议选择推理模型。 - 在查询阶段建议选择比索引阶段能力更强的模型,以达到更高的查询效果。 - **Embedding模型**: - 高性能的Embedding模型对RAG至关重要。 - 推荐使用主流的多语言Embedding模型,例如:BAAI/bge-m3 和 text-embedding-3-large。 - **重要提示**:在文档索引前必须确定使用的Embedding模型,且在文档查询阶段必须沿用与索引阶段相同的模型。有些存储(例如PostgreSQL)在首次建立数表的时候需要确定向量维度,因此更换Embedding模型后需要删除向量相关库表,以便让LightRAG重建新的库表。 - **Reranker模型配置**: - 配置Reranker模型能够显著提升LightRAG的检索效果。 - 启用Reranker模型后,推荐将“mix模式”设为默认查询模式。 - 推荐选用主流的Reranker模型,例如:BAAI/bge-reranker-v2-m3 或 Jina 等服务商提供的模型。 ### 使用LightRAG服务器 **有关LightRAG服务器的更多信息,请参阅[LightRAG服务器](./lightrag/api/README.md)。** ### 使用LightRAG Core LightRAG核心功能的示例代码请参见`examples`目录。您还可参照[视频](https://www.youtube.com/watch?v=g21royNJ4fw)视频完成环境配置。若已持有OpenAI API密钥,可以通过以下命令运行演示代码: ```bash ### you should run the demo code with project folder cd LightRAG ### provide your API-KEY for OpenAI export OPENAI_API_KEY="sk-...your_opeai_key..." ### download the demo document of "A Christmas Carol" by Charles Dickens curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt ### run the demo code python examples/lightrag_openai_demo.py ``` 如需流式响应示例的实现代码,请参阅 `examples/lightrag_openai_compatible_demo.py`。运行前,请确保根据需求修改示例代码中的LLM及嵌入模型配置。 **注意1**:在运行demo程序的时候需要注意,不同的测试程序可能使用的是不同的embedding模型,更换不同的embeding模型的时候需要把清空数据目录(`./dickens`),否则层序执行会出错。如果你想保留LLM缓存,可以在清除数据目录时保留`kv_store_llm_response_cache.json`文件。 **注意2**:官方支持的示例代码仅为 `lightrag_openai_demo.py` 和 `lightrag_openai_compatible_demo.py` 两个文件。其他示例文件均为社区贡献内容,尚未经过完整测试与优化。 ## 使用LightRAG Core进行编程 > ⚠️ **如果您希望将LightRAG集成到您的项目中,建议您使用LightRAG Server提供的REST API**。LightRAG Core通常用于嵌入式应用,或供希望进行研究与评估的学者使用。 ### ⚠️ 重要:初始化要求 LightRAG 在使用前需要显式初始化。 创建 LightRAG 实例后,您必须调用 await rag.initialize_storages(),否则将出现错误。 ### 一个简单程序 以下Python代码片段演示了如何初始化LightRAG、插入文本并进行查询: ```python import os import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed from lightrag.utils import setup_logger setup_logger("lightrag", level="INFO") WORKING_DIR = "./rag_storage" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) # IMPORTANT: Both initialization calls are required! await rag.initialize_storages() # Initialize storage backends return rag async def main(): try: # 初始化RAG实例 rag = await initialize_rag() await rag.ainsert("Your text") # 执行混合检索 mode = "hybrid" print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode=mode) ) ) except Exception as e: print(f"发生错误: {e}") finally: if rag: await rag.finalize_storages() if __name__ == "__main__": asyncio.run(main()) ``` 重要说明: - 运行脚本前请先导出你的OPENAI_API_KEY环境变量。 - 该程序使用LightRAG的默认存储设置,所有数据将持久化在WORKING_DIR/rag_storage目录下。 - 该示例仅展示了初始化LightRAG对象的最简单方式:注入embedding和LLM函数,并在创建LightRAG对象后初始化存储和管道状态。 ### LightRAG初始化参数 以下是完整的LightRAG对象初始化参数清单:
参数 | **参数** | **类型** | **说明** | **默认值** | | -------------- | ---------- | ----------------- | ------------- | | **working_dir** | `str` | 存储缓存的目录 | `lightrag_cache+timestamp` | | **workspace** | str | 用于不同 LightRAG 实例之间数据隔离的工作区名称 | | | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage`,`OpenSearchKVStorage` | `JsonKVStorage` | | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage`,`OpenSearchVectorDBStorage` | `NanoVectorDBStorage` | | **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage`,`OpenSearchGraphStorage` | `NetworkXStorage` | | **doc_status_storage** | `str` | Storage type for documents process status. Supported types: `JsonDocStatusStorage`,`PGDocStatusStorage`,`MongoDocStatusStorage`,`OpenSearchDocStatusStorage` | `JsonDocStatusStorage` | | **chunk_token_size** | `int` | 拆分文档时每个块的最大令牌大小 | `1200` | | **chunk_overlap_token_size** | `int` | 拆分文档时两个块之间的重叠令牌大小 | `100` | | **tokenizer** | `Tokenizer` | 用于将文本转换为 tokens(数字)以及使用遵循 TokenizerInterface 协议的 .encode() 和 .decode() 函数将 tokens 转换回文本的函数。 如果您不指定,它将使用默认的 Tiktoken tokenizer。 | `TiktokenTokenizer` | | **tiktoken_model_name** | `str` | 如果您使用的是默认的 Tiktoken tokenizer,那么这是要使用的特定 Tiktoken 模型的名称。如果您提供自己的 tokenizer,则忽略此设置。 | `gpt-4o-mini` | | **entity_extract_max_gleaning** | `int` | 实体提取过程中的循环次数,附加历史消息 | `1` | | **node_embedding_algorithm** | `str` | 节点嵌入算法(当前未使用) | `node2vec` | | **node2vec_params** | `dict` | 节点嵌入的参数 | `{"dimensions": 1536,"num_walks": 10,"walk_length": 40,"window_size": 2,"iterations": 3,"random_seed": 3,}` | | **embedding_func** | `EmbeddingFunc` | 从文本生成嵌入向量的函数 | `openai_embed` | | **embedding_batch_num** | `int` | 嵌入过程的最大批量大小(每批发送多个文本) | `32` | | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` | | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` | | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` | | **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`(由环境变量 SUMMARY_MAX_CONTEXT 设置) | | **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`(由环境变量 SUMMARY_MAX_TOKENS 设置) | | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) | | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | | | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) | | **enable_llm_cache** | `bool` | 如果为`TRUE`,将LLM结果存储在缓存中;重复的提示返回缓存的响应 | `TRUE` | | **enable_llm_cache_for_entity_extract** | `bool` | 如果为`TRUE`,将实体提取的LLM结果存储在缓存中;适合初学者调试应用程序 | `TRUE` | | **addon_params** | `dict` | 附加参数,例如`{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`:设置示例限制、输出语言和文档处理的批量大小 | language: English` | | **embedding_cache_config** | `dict` | 问答缓存的配置。包含三个参数:`enabled`:布尔值,启用/禁用缓存查找功能。启用时,系统将在生成新答案之前检查缓存的响应。`similarity_threshold`:浮点值(0-1),相似度阈值。当新问题与缓存问题的相似度超过此阈值时,将直接返回缓存的答案而不调用LLM。`use_llm_check`:布尔值,启用/禁用LLM相似度验证。启用时,在返回缓存答案之前,将使用LLM作为二次检查来验证问题之间的相似度。 | 默认:`{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
### 查询参数 使用QueryParam控制你的查询行为: ```python class QueryParam: """Configuration parameters for query execution in LightRAG.""" mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global" """Specifies the retrieval mode: - "local": Focuses on context-dependent information. - "global": Utilizes global knowledge. - "hybrid": Combines local and global retrieval methods. - "naive": Performs a basic search without advanced techniques. - "mix": Integrates knowledge graph and vector retrieval. """ only_need_context: bool = False """If True, only returns the retrieved context without generating a response.""" only_need_prompt: bool = False """If True, only returns the generated prompt without producing a response.""" response_type: str = "Multiple Paragraphs" """Defines the response format. Examples: 'Multiple Paragraphs', 'Single Paragraph', 'Bullet Points'.""" stream: bool = False """If True, enables streaming output for real-time responses.""" top_k: int = int(os.getenv("TOP_K", "60")) """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "20")) """Number of text chunks to retrieve initially from vector search and keep after reranking. If None, defaults to top_k value. """ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "6000")) """Maximum number of tokens allocated for entity context in unified token control system.""" max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "8000")) """Maximum number of tokens allocated for relationship context in unified token control system.""" max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000")) """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" # History messages are only sent to LLM for context, not used for retrieval conversation_history: list[dict[str, str]] = field(default_factory=list) """Stores past conversation history to maintain context. Format: [{"role": "user/assistant", "content": "message"}]. """ # Deprecated (ids filter lead to potential hallucination effects) ids: list[str] | None = None """List of ids to filter the results.""" model_func: Callable[..., object] | None = None """Optional override for the LLM model function to use for this specific query. If provided, this will be used instead of the global model function. This allows using different models for different query modes. """ user_prompt: str | None = None """User-provided prompt for the query. Addition instructions for LLM. If provided, this will be inject into the prompt template. It's purpose is the let user customize the way LLM generate the response. """ enable_rerank: bool = True """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True to enable reranking when rerank model is available. """ ``` > top_k的默认值可以通过环境变量TOP_K更改。 ### LLM and Embedding注入 LightRAG 需要利用LLM和Embeding模型来完成文档索引和知识库查询工作。在初始化LightRAG的时候需要把阶段,需要把LLM和Embedding的操作函数注入到对象中:
使用类OpenAI的API * LightRAG还支持类OpenAI的聊天/嵌入API: ```python import os import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.openai import openai_complete_if_cache, openai_embed async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await openai_complete_if_cache( "solar-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("UPSTAGE_API_KEY"), base_url="https://api.upstage.ai/v1/solar", **kwargs ) @wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192, model_name="solar-embedding-1-large-query") async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed.func( texts, model="solar-embedding-1-large-query", api_key=os.getenv("UPSTAGE_API_KEY"), base_url="https://api.upstage.ai/v1/solar" ) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=embedding_func # 直接传入装饰后的函数 ) await rag.initialize_storages() return rag ``` > **关于嵌入函数封装的重要说明:** > > `EmbeddingFunc` 不能嵌套封装。已经被 `@wrap_embedding_func_with_attrs` 装饰过的嵌入函数(如 `openai_embed`、`ollama_embed` 等)不能再次使用 `EmbeddingFunc()` 封装。这就是为什么在创建自定义嵌入函数时,我们调用 `xxx_embed.func`(底层未封装的函数)而不是直接调用 `xxx_embed`。
使用 Hugging Face 模型 * 如果您想使用 Hugging Face 模型,只需要按如下方式设置 LightRAG: 参见 `lightrag_hf_demo.py` ```python from functools import partial from transformers import AutoTokenizer, AutoModel # Pre-load tokenizer and model tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") embed_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") # 使用 Hugging Face 模型初始化 LightRAG rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=hf_model_complete, # 使用 Hugging Face 模型进行文本生成 llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Hugging Face 的模型名称 # 使用 Hugging Face 嵌入函数 embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=2048, model_name="sentence-transformers/all-MiniLM-L6-v2", func=partial( hf_embed.func, # 使用 .func 访问底层未封装的函数 tokenizer=tokenizer, embed_model=embed_model ) ), ) ```
使用Ollama模型 **综述** 如果您想使用Ollama模型,您需要拉取计划使用的模型和嵌入模型,例如`nomic-embed-text`。 然后您只需要按如下方式设置LightRAG: ```python import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed @wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") # Initialize LightRAG with Ollama model rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, # Use Ollama model for text generation llm_model_name='your_model_name', # Your model name embedding_func=embedding_func, # Pass the decorated function directly ) ``` * **增加上下文大小** 为了使 LightRAG 正常工作,上下文大小至少需要 32k tokens。默认情况下,Ollama 模型的上下文大小为 8k。您可以通过以下两种方式之一来实现: * **在 Modelfile 中增加 `num_ctx` 参数** 1. 拉取模型: ```bash ollama pull qwen2 ``` 2. 显示模型文件: ```bash ollama show --modelfile qwen2 > Modelfile ``` 3. 编辑 Modelfile,添加以下行: ```bash PARAMETER num_ctx 32768 ``` 4. 创建修改后的模型: ```bash ollama create -f Modelfile qwen2m ``` * **通过 Ollama API 设置 `num_ctx`** 您可以使用 `llm_model_kwargs` 参数来配置 Ollama: ```python import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed @wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, # 使用 Ollama 模型进行文本生成 llm_model_name='your_model_name', # 您的模型名称 llm_model_kwargs={"options": {"num_ctx": 32768}}, embedding_func=embedding_func, # 直接传入装饰后的函数 ) ``` > **关于嵌入函数封装的重要说明:** > > `EmbeddingFunc` 不能嵌套封装。已经被 `@wrap_embedding_func_with_attrs` 装饰过的嵌入函数(如 `openai_embed`、`ollama_embed` 等)不能再次使用 `EmbeddingFunc()` 封装。这就是为什么在创建自定义嵌入函数时,我们调用 `xxx_embed.func`(底层未封装的函数)而不是直接调用 `xxx_embed`。 * **低显存 GPU** 如果要在低显存 GPU 上运行此实验,您应该选择较小的模型并调整上下文窗口(增加上下文会增加内存消耗)。例如,在一块改装的 6GB 显存的挖矿 GPU 上运行此 Ollama 示例,需要在使用 `gemma2:2b` 时将上下文大小设置为 26k。它能够在 `book.txt` 中找到 197 个实体和 19 个关系。
LlamaIndex LightRAG 支持与 LlamaIndex 集成(`llm/llama_index_impl.py`): - 通过 LlamaIndex 与 OpenAI 和其他提供商集成 - 详细设置请参阅 [LlamaIndex 文档](https://developers.llamaindex.ai/python/framework/) 或 [示例](examples/unofficial-sample/) **示例用法** ```python # 使用 LlamaIndex 直接访问 OpenAI import asyncio from lightrag import LightRAG from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.openai import OpenAI from lightrag.utils import setup_logger # 为 LightRAG 设置日志处理器 setup_logger("lightrag", level="INFO") async def initialize_rag(): rag = LightRAG( working_dir="your/path", llm_model_func=llama_index_complete_if_cache, # 与 LlamaIndex 兼容的补全函数 embedding_func=EmbeddingFunc( # 与 LlamaIndex 兼容的嵌入函数 embedding_dim=1536, max_token_size=2048, model_name=embed_model, func=partial(llama_index_embed.func, embed_model=embed_model) # 使用 .func 访问未封装的原始函数 ), ) await rag.initialize_storages() return rag def main(): # 初始化 RAG 实例 rag = asyncio.run(initialize_rag()) with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # 执行朴素搜索 print( rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) ) # 执行本地搜索 print( rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) ) # 执行全局搜索 print( rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) ) # 执行混合搜索 print( rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) ) if __name__ == "__main__": main() ``` **详细文档和示例请参阅:** - [LlamaIndex 文档](https://developers.llamaindex.ai/python/framework/) - [直接使用 OpenAI 示例](examples/unofficial-sample/lightrag_llamaindex_direct_demo.py) - [LiteLLM 代理示例](examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py) - [LiteLLM 代理与 Opik 集成示例](examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py)
使用 Azure OpenAI 模型 如果您想使用 Azure OpenAI 模型,您只需要按如下方式设置 LightRAG: ```python import os import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.azure_openai import azure_openai_complete_if_cache, azure_openai_embed # 配置生成模型 async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await azure_openai_complete_if_cache( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_version=os.getenv("AZURE_OPENAI_API_VERSION"), deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"), **kwargs ) # 配置嵌入模型 @wrap_embedding_func_with_attrs( embedding_dim=1536, max_token_size=8192, model_name=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL") ) async def embedding_func(texts: list[str]) -> np.ndarray: return await azure_openai_embed.func( texts, api_key=os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_version=os.getenv("AZURE_OPENAI_API_VERSION"), deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME") ) rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=embedding_func ) ```
使用 Google Gemini 模型 如果您想使用 Google Gemini 模型,您只需要按如下方式设置 LightRAG: ```python import os import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.gemini import gemini_complete, gemini_embed # 配置生成模型 async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await gemini_complete( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("GEMINI_API_KEY"), model="gemini-1.5-flash", **kwargs ) # 配置嵌入模型 @wrap_embedding_func_with_attrs( embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004" ) async def embedding_func(texts: list[str]) -> np.ndarray: return await gemini_embed.func( texts, api_key=os.getenv("GEMINI_API_KEY"), model="models/text-embedding-004" ) rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, llm_model_name="gemini-2.0-flash", embedding_func=embedding_func ) ```
### Rerank 函数注入 为了提升检索质量,可以基于更有效的相关性评分模型对文档进行重新排序。`rerank.py` 文件提供了三个 Reranker 服务商的驱动函数: * **Cohere / vLLM**: `cohere_rerank` * **Jina AI**: `jina_rerank` * **阿里云**: `ali_rerank` 您可以将其中一个函数注入到 LightRAG 对象的 `rerank_model_func` 属性中。这将使 LightRAG 的查询函数能够使用注入的函数对检索到的文本块进行重新排序。详细用法请参考 `examples/rerank_example.py` 文件。 ### User Prompt 与 Query 的区别 使用 LightRAG 进行内容查询时,应避免将搜索过程与不相关的输出处理混合在一起,因为这会显著影响查询效果。QueryParam 中的 `user_prompt` 参数专门用于解决此问题 - 它不参与 RAG 检索阶段,而是在查询完成后指导 LLM 如何处理检索到的结果。使用方法如下: ```python # 创建查询参数 query_param = QueryParam( mode = "hybrid", # 其他模式:local, global, hybrid, mix, naive user_prompt = "对于图表,使用 mermaid 格式,节点名称使用英文或拼音,显示标签使用中文", ) # 查询并处理 response_default = rag.query( "请为斯克鲁奇绘制人物关系图", param=query_param ) print(response_default) ``` ### 插入
基本插入 ```python # 基本插入 rag.insert("文本") ```
批量插入 ```python # 基本批量插入:一次插入多个文本 rag.insert(["文本1", "文本2",...]) # 自定义批量大小配置的批量插入 rag = LightRAG( ... working_dir=WORKING_DIR, max_parallel_insert = 4 ) rag.insert(["文本1", "文本2", "文本3", ...]) # 文档将以每批 4 个的方式处理 ``` `max_parallel_insert` 参数决定了文档索引管道中并发处理的文档数量。如果未指定,默认值为 **2**。我们建议将此设置保持在 **10 以下**,因为性能瓶颈通常在于大语言模型(LLM)的处理能力。
带 ID 插入 如果您想为文档提供自定义 ID,文档数量和 ID 数量必须相同。 ```python # 插入单个文本,并为其提供 ID rag.insert("文本1", ids=["文本1的ID"]) # 插入多个文本,并为它们提供 ID rag.insert(["文本1", "文本2",...], ids=["文本1的ID", "文本2的ID"]) ```
使用管道插入 `apipeline_enqueue_documents` 和 `apipeline_process_enqueue_documents` 函数允许您将文档增量插入到图中。这对于希望在后台处理文档同时允许主线程继续执行的场景非常有用。 ```python rag = LightRAG(..) await rag.apipeline_enqueue_documents(input) # 在循环中的例程 await rag.apipeline_process_enqueue_documents(input) ```
多文件类型支持插入 `textract` 支持读取 TXT、DOCX、PPTX、CSV 和 PDF 等文件类型。 ```python import textract file_path = 'TEXT.pdf' text_content = textract.process(file_path) rag.insert(text_content.decode('utf-8')) ```
引用功能 通过提供文件路径,系统可以确保来源可以追溯到原始文档。 ```python # 定义文档及其文件路径 documents = ["文档内容 1", "文档内容 2"] file_paths = ["path/to/doc1.txt", "path/to/doc2.txt"] # 带文件路径插入文档 rag.insert(documents, file_paths=file_paths) ```
### 存储方案 LightRAG 使用 4 种类型的存储来满足不同用途: * KV_STORAGE:LLM 响应缓存、文本块、文档信息 * VECTOR_STORAGE:实体向量、关系向量、文本块向量 * GRAPH_STORAGE:实体关系图 * DOC_STATUS_STORAGE:文档索引状态 每种存储类型都有多种实现: * KV_STORAGE 支持的实现: ``` JsonKVStorage JsonFile(默认) PGKVStorage Postgres RedisKVStorage Redis MongoKVStorage MongoDB OpenSearchKVStorage OpenSearch ``` * GRAPH_STORAGE 支持的实现: ``` NetworkXStorage NetworkX(默认) Neo4JStorage Neo4J PGGraphStorage PostgreSQL with AGE 插件 MemgraphStorage Memgraph OpenSearchGraphStorage OpenSearch ``` > 测试表明,Neo4J 在生产环境中的性能优于带有 AGE 插件的 PostgreSQL。 * VECTOR_STORAGE 支持的实现: ``` NanoVectorDBStorage NanoVector(默认) PGVectorStorage Postgres MilvusVectorDBStorage Milvus FaissVectorDBStorage Faiss QdrantVectorDBStorage Qdrant MongoVectorDBStorage MongoDB OpenSearchVectorDBStorage OpenSearch ``` * DOC_STATUS_STORAGE 支持的实现: ``` JsonDocStatusStorage JsonFile(默认) PGDocStatusStorage Postgres MongoDocStatusStorage MongoDB OpenSearchDocStatusStorage OpenSearch ``` 各存储类型的示例连接配置可在仓库中的 `env.example` 文件里找到。连接字符串中的数据库实例需要您预先在数据库服务器上创建。LightRAG 仅负责在数据库实例中创建表,不负责创建数据库实例本身。如果使用 Redis 作为存储,请记住配置 Redis 的自动数据持久化规则,否则 Redis 服务重启后数据将会丢失。如果使用 PostgreSQL,建议使用 16.6 或更高版本。
使用 Neo4J 存储 * 对于生产级场景,您很可能需要使用企业级解决方案 * 用于知识图谱存储。推荐在 Docker 中运行 Neo4J 进行无缝本地测试。 * 参见:https://hub.docker.com/_/neo4j ```python export NEO4J_URI="neo4j://localhost:7687" export NEO4J_USERNAME="neo4j" export NEO4J_PASSWORD="password" export NEO4J_DATABASE="neo4j" #<----------- 使用 neo4j 社区版 docker 镜像时数据库实例必须为neo4j # 为 LightRAG 设置日志 setup_logger("lightrag", level="INFO") # 启动项目时,请确保通过指定 graph_storage="Neo4JStorage" 来覆盖默认的 KG: NetworkX。 # 使用 Neo4J 实现初始化 LightRAG。 async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, # 使用 gpt_4o_mini_complete LLM 模型 graph_storage="Neo4JStorage", #<-----------覆盖 KG 默认值 ) # 初始化数据库连接 await rag.initialize_storages() # 初始化文档处理的管道状态 return rag ``` 参见 test_neo4j.py 获取可运行的示例。
使用 PostgreSQL 存储 对于生产级场景,您很可能需要使用企业级解决方案。PostgreSQL 可以为您提供一站式解决方案,作为 KV 存储、VectorDB(pgvector)和 GraphDB(apache AGE)。支持 PostgreSQL 16.6 或更高版本。 * PostgreSQL 很轻量,包含所有必要插件的完整二进制发行版可以压缩到 40MB:参考 [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0),Linux/Mac 也很容易安装。 * 如果您喜欢 docker,建议初学者使用此镜像以避免出现问题(默认用户密码:rag/rag):https://hub.docker.com/r/gzdaniel/postgres-for-rag * 如何开始?参考:[examples/lightrag_gemini_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_gemini_postgres_demo.py) * 对于高性能图数据库需求,推荐使用 Neo4j,因为 Apache AGE 的性能不够理想。
使用 Faiss 存储 在使用 Faiss 向量数据库之前,您必须手动安装 `faiss-cpu` 或 `faiss-gpu`。 - 安装所需依赖: ``` pip install faiss-cpu ``` 如果您有 GPU 支持,也可以安装 `faiss-gpu`。 - 这里我们使用 `sentence-transformers`,但您也可以使用 `3072` 维度的 `OpenAIEmbedding` 模型。 ```python async def embedding_func(texts: list[str]) -> np.ndarray: model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode(texts, convert_to_numpy=True) return embeddings # 使用 LLM 模型函数和嵌入函数初始化 LightRAG rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=2048, model_name="all-MiniLM-L6-v2", func=embedding_func, ), vector_storage="FaissVectorDBStorage", vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": 0.3 # 您期望的阈值 } ) ```
使用 Memgraph 存储 * Memgraph 是一个高性能的内存图数据库,兼容 Neo4j Bolt 协议。 * 您可以使用 Docker 在本地运行 Memgraph 进行简单测试: * 参见:https://memgraph.com/download ```python export MEMGRAPH_URI="bolt://localhost:7687" # 为 LightRAG 设置日志 setup_logger("lightrag", level="INFO") # 启动项目时,通过指定 kg="MemgraphStorage" 来覆盖默认的 KG: NetworkX。 # 注意:默认设置使用 NetworkX # 使用 Memgraph 实现初始化 LightRAG。 async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, # 使用 gpt_4o_mini_complete LLM 模型 graph_storage="MemgraphStorage", #<-----------覆盖 KG 默认值 ) # 初始化数据库连接 await rag.initialize_storages() # 初始化文档处理的管道状态 return rag ```
使用 Milvus 作为向量存储 Milvus 是一个高性能、可扩展的向量数据库,适用于生产环境的向量存储。LightRAG 提供了三种配置 Milvus 的方式,并支持可配置的索引类型,以优化性能和内存使用。 ### 支持的索引类型 - `AUTOINDEX`(默认):Milvus 自动选择最佳索引 - `HNSW`:层次可导航小世界图,适用于高召回率 - `HNSW_SQ`:使用标量量化技术的 HNSW,可节省内存(需 Milvus 2.6.8+) - `HNSW_PQ`、`HNSW_PRQ`:使用乘积量化/残差乘积量化技术的 HNSW - `IVF_FLAT`、`IVF_SQ8`、`IVF_PQ`:倒排文件族索引 - `DISKANN`:基于磁盘的近似最近邻索引 - `SCANN`:可扩展的最近邻索引 ### 支持的度量类型 `COSINE` (默认), `L2`, `IP` --- ### 配置方法1 — 环境变量 (`.env` file) 适用于: **LightRAG Server 部署和 Docker/k8s 设置**. ```bash # Connection MILVUS_URI=http://localhost:19530 MILVUS_DB_NAME=lightrag # MILVUS_USER=root # MILVUS_PASSWORD=your_password # MILVUS_TOKEN=your_token # Storage selection LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage # Index configuration (all optional — sensible defaults apply) MILVUS_INDEX_TYPE=HNSW # Default: AUTOINDEX MILVUS_METRIC_TYPE=COSINE # Default: COSINE MILVUS_HNSW_M=16 # Default: 16, range [2-2048] MILVUS_HNSW_EF_CONSTRUCTION=360 # Default: 360 MILVUS_HNSW_EF=200 # Default: 200 # HNSW_SQ options (requires Milvus 2.6.8+) # MILVUS_INDEX_TYPE=HNSW_SQ # MILVUS_HNSW_SQ_TYPE=SQ8 # SQ4U, SQ6, SQ8, BF16, FP16 # MILVUS_HNSW_SQ_REFINE=false # Enable refinement # MILVUS_HNSW_SQ_REFINE_TYPE=FP32 # Refinement precision # MILVUS_HNSW_SQ_REFINE_K=10 # Refinement expansion factor # IVF options # MILVUS_IVF_NLIST=1024 # MILVUS_IVF_NPROBE=16 ``` 然后再Python代码中: ```python from lightrag import LightRAG async def initialize_rag(): rag = LightRAG( working_dir="./rag_storage", llm_model_func=..., embedding_func=..., vector_storage="MilvusVectorDBStorage", ) await rag.initialize_storages() return rag ``` ### 配置方案2 — `vector_db_storage_cls_kwargs` (Python SDK) 适用于: **Python SDK / framework integration** (使用代码进行配置) ```python from lightrag import LightRAG async def initialize_rag(): rag = LightRAG( working_dir="./rag_storage", llm_model_func=..., embedding_func=..., vector_storage="MilvusVectorDBStorage", vector_db_storage_cls_kwargs={ "milvus_uri": "http://localhost:19530", "milvus_db_name": "lightrag", "index_type": "HNSW", "metric_type": "COSINE", "hnsw_m": 16, "hnsw_ef_construction": 360, "hnsw_ef": 200, "cosine_better_than_threshold": 0.2, }, ) await rag.initialize_storages() return rag ``` ### 配置方案3 — `config.ini` (遗留方案) 仅适用于连接参数配资;索引方式配资依然需要使用环境变量或kwargs. ```ini [milvus] uri = http://localhost:19530 db_name = lightrag # user = root # password = your_password # token = your_token ``` ### 配置优先级 | 配置 | 1st (highest) | 2nd | 3rd (lowest) | |---|---|---|---| | 连接方式 (`uri`, …) | `vector_db_storage_cls_kwargs` | Environment variables | `config.ini` | | 索引方法 (`index_type`, …) | `vector_db_storage_cls_kwargs` | Environment variables | defaults | ### HNSW_SQ 压缩的权衡 | SQ Type | Compression | Precision | Notes | |---|---|---|---| | `SQ4U` | ~8× | Lower | Best memory savings | | `SQ6` | ~5.3× | Balanced | Good trade-off | | `SQ8` | ~4× | Good | **Recommended** | | `BF16` / `FP16` | ~2× | High | Near-lossless | **版本要求:** - HNSW_SQ 缩影方式要求 **Milvus 2.6.8 或更高版本** - LightRAG 将自动检查服务的版本并在不符合要求的时候抛出错误 - 其它缩影方式要求Milvus 2.0+ **向后兼容性:** - 现有数据集合不受影响;索引配置仅适用于新创建的集合 - 有关完整的配置选项,请参阅 env.example 和 docs/MilvusConfigurationGuide.md。 完整的配资选项请参考 `env.example` 和 `docs/MilvusConfigurationGuide.md`.
使用 MongoDB 存储 MongoDB 为 LightRAG 提供了一站式存储解决方案。MongoDB 提供原生的 KV 存储和向量存储。LightRAG 使用 MongoDB 集合来实现简单的图存储。`MongoVectorDBStorage` 需要目标 MongoDB 部署具备 Atlas Search / Vector Search 能力,例如 MongoDB Atlas 或 Atlas local。交互式 setup 向导内置的本地 Docker MongoDB 服务是 MongoDB Community Edition,因此它可以用于 KV / 图 / 文档状态存储,但不能作为 `MongoVectorDBStorage` 的后端。
使用 Redis 存储 LightRAG 支持使用 Redis 作为 KV 存储。使用 Redis 存储时,需要注意持久化配置和内存使用配置。以下是推荐的 Redis 配置: ``` save 900 1 save 300 10 save 60 1000 stop-writes-on-bgsave-error yes maxmemory 4gb maxmemory-policy noeviction maxclients 500 ``` 当交互式 setup 管理本地 Redis 容器时,它会在 `./data/config/redis.conf` 生成一个可直接修改的配置文件,并将其挂载到容器内。后续重新运行 setup 时会保留该文件,避免覆盖用户的手工调整。
使用 OpenSearch 存储 OpenSearch 为 LightRAG 的全部四种存储类型(KV、向量、图、文档状态)提供了统一的存储解决方案。它提供原生 k-NN 向量搜索、全文搜索和水平扩展能力,且无云服务限制。 * **环境要求**:OpenSearch 3.x 或更高版本,需启用 k-NN 插件。 使用 Docker 安装 (不含插件): ```bash docker run -d -p 9200:9200 -e "discovery.type=single-node" \ -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=" \ opensearchproject/opensearch:latest ``` 使用 Docker Compose 安装 (推荐,含插件): ```bash curl -O https://raw.githubusercontent.com/opensearch-project/opensearch-build/main/docker/release/dockercomposefiles/docker-compose-3.x.yml # 启动 OpenSearch 集群 OPENSEARCH_INITIAL_ADMIN_PASSWORD= docker-compose -f docker-compose-3.x.yml up -d ``` * **配置**:设置环境变量(完整列表请参见 `env.example`): ```bash export OPENSEARCH_HOSTS=localhost:9200 export OPENSEARCH_USER=admin export OPENSEARCH_PASSWORD= export OPENSEARCH_USE_SSL=true export OPENSEARCH_VERIFY_CERTS=false ``` * **使用方式**: ```python rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=your_llm_func, embedding_func=your_embed_func, kv_storage="OpenSearchKVStorage", doc_status_storage="OpenSearchDocStatusStorage", graph_storage="OpenSearchGraphStorage", vector_storage="OpenSearchVectorDBStorage", ) ``` * **图遍历**:当 OpenSearch SQL 插件支持 PPL 时,图查询会使用 `graphlookup` 命令进行服务端 BFS 遍历以获得最佳性能。否则,将回退到客户端批量 BFS。此功能在启动时自动检测,也可通过 `OPENSEARCH_USE_PPL_GRAPHLOOKUP=true|false` 强制设置。 * **集成测试**:针对实际运行的 OpenSearch 集群进行集成测试: 1. 使用 Docker Compose 启动 OpenSearch(下载 [`docker-compose-3.x.yml`](https://raw.githubusercontent.com/opensearch-project/opensearch-build/main/docker/release/dockercomposefiles/docker-compose-3.x.yml)): ```bash OPENSEARCH_INITIAL_ADMIN_PASSWORD= docker-compose -f docker-compose-3.x.yml up -d ``` 2. 验证集群是否正常运行: ```bash curl -sk -u admin: https://localhost:9200 curl -sk -u admin: https://localhost:9200/_cat/plugins?v ``` 3. 运行单元测试(无需 OpenSearch 实例,使用 mock): ```bash python -m pytest tests/test_opensearch_storage.py -v ``` 4. 使用实际集群以OpenSearch作为存储的演示: ```bash export OPENSEARCH_HOSTS=localhost:9200 export OPENSEARCH_USER=admin export OPENSEARCH_PASSWORD= export OPENSEARCH_USE_SSL=true export OPENSEARCH_VERIFY_CERTS=false python examples/opensearch_storage_demo.py ``` 5. 运行完整的 OpenAI + OpenSearch 示例(需要 `OPENAI_API_KEY`): ```bash export OPENAI_API_KEY=your-api-key python examples/lightrag_openai_opensearch_graph_demo.py ``` 6. 通过 LightRAG WebUI 或独立 HTML 文件可视化知识图谱: 启动 LightRAG 服务器之前,需要[构建前端组建](https://github.com/HKUDS/LightRAG/blob/main/lightrag/api/README.md). ```bash # 带上 OpenSearch 存储的配置,启动 LightRAG 服务器 LIGHTRAG_KV_STORAGE=OpenSearchKVStorage \ LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage \ LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage \ LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage \ LLM_BINDING=openai \ EMBEDDING_BINDING=openai \ EMBEDDING_MODEL=text-embedding-3-large \ EMBEDDING_DIM=3072 \ OPENAI_API_KEY=your-api-key \ lightrag-server # 执行该脚本读取 OpenSearch 存储的数据,生成知识图谱 python examples/graph_visual_with_opensearch.py # 打开 http://localhost:9621/webui/ -> 知识图谱标签 # 或执行该脚本生成独立 HTML 文件 python examples/graph_visual_with_opensearch.py --html ```
### LightRAG 实例之间的数据隔离 `workspace` 参数确保不同 LightRAG 实例之间的数据隔离。一旦初始化,`workspace` 是不可变的,无法更改。以下是不同类型存储实现工作区的方式: - **对于基于本地文件的数据库,通过工作区子目录实现数据隔离**:`JsonKVStorage`、`JsonDocStatusStorage`、`NetworkXStorage`、`NanoVectorDBStorage`、`FaissVectorDBStorage`。 - **对于以集合方式存储数据的数据库,通过在集合名称前添加工作区前缀来实现**:`RedisKVStorage`、`RedisDocStatusStorage`、`MilvusVectorDBStorage`、`MongoKVStorage`、`MongoDocStatusStorage`、`MongoVectorDBStorage`、`MongoGraphStorage`、`PGGraphStorage`。 - **对于 Qdrant 向量数据库,通过基于 payload 的分区实现数据隔离(Qdrant 推荐的多租户方法)**:`QdrantVectorDBStorage` 使用带有 payload 过滤的共享集合,实现无限的工作区可扩展性。 - **对于关系型数据库,通过在表中添加 `workspace` 字段实现逻辑数据分离**:`PGKVStorage`、`PGVectorStorage`、`PGDocStatusStorage`。 - **对于 Neo4j 图数据库,通过标签实现逻辑数据隔离**:`Neo4JStorage` - **对于 OpenSearch,通过索引名称前缀实现数据隔离**:`OpenSearchKVStorage`、`OpenSearchDocStatusStorage`、`OpenSearchGraphStorage`、`OpenSearchVectorDBStorage` 为了保持与旧数据的兼容性,当未配置工作区时,PostgreSQL 非图存储的默认工作区为 `default`,PostgreSQL AGE 图存储的默认工作区为 null,Neo4j 图存储的默认工作区为 `base`。对于所有外部存储,系统提供专用的工作区环境变量来覆盖通用的 `WORKSPACE` 环境变量配置。这些存储特定的工作区环境变量包括:`REDIS_WORKSPACE`、`MILVUS_WORKSPACE`、`QDRANT_WORKSPACE`、`MONGODB_WORKSPACE`、`POSTGRES_WORKSPACE`、`NEO4J_WORKSPACE`、`OPENSEARCH_WORKSPACE`。 **使用示例:** 有关在单个应用程序中管理多个隔离知识库(例如,将"书籍"内容与"人力资源政策"分开)的实际演示,请参阅 [Workspace Demo](examples/lightrag_gemini_workspace_demo.py)。 ### AGENTS.md -- 指导编码代理 AGENTS.md 是一种简单、开放的格式,用于指导编码代理(https://agents.md/)。它是一个专门的、可预测的地方,用于提供上下文和指令,帮助 AI 编码代理在 LightRAG 项目上工作。不同的 AI 编码器不应单独维护各自的指导文件。如果任何 AI 编码器无法自动识别 AGENTS.md,可以使用符号链接作为解决方案。建立符号链接后,可以通过配置本地的 `.gitignore_global` 来防止它们被提交到 Git 仓库。 ## 编辑实体和关系 LightRAG 现在支持全面的知识图谱管理功能,允许您在知识图谱中创建、编辑和删除实体和关系。
创建实体和关系 ```python # 创建新实体 entity = rag.create_entity("Google", { "description": "Google 是一家专注于互联网相关服务和产品的跨国科技公司。", "entity_type": "company" }) # 创建另一个实体 product = rag.create_entity("Gmail", { "description": "Gmail 是 Google 开发的电子邮件服务。", "entity_type": "product" }) # 创建实体之间的关系 relation = rag.create_relation("Google", "Gmail", { "description": "Google 开发和运营 Gmail。", "keywords": "develops operates service", "weight": 2.0 }) ```
手动修改实体与关系 ```python # Edit an existing entity updated_entity = rag.edit_entity("Google", { "description": "Google is a subsidiary of Alphabet Inc., founded in 1998.", "entity_type": "tech_company" }) # Rename an entity (with all its relationships properly migrated) renamed_entity = rag.edit_entity("Gmail", { "entity_name": "Google Mail", "description": "Google Mail (formerly Gmail) is an email service." }) # Edit a relation between entities updated_relation = rag.edit_relation("Google", "Google Mail", { "description": "Google created and maintains Google Mail service.", "keywords": "creates maintains email service", "weight": 3.0 }) ``` 所有操作均提供同步和异步两个版本。异步版本带有 "a" 前缀(例如:`acreate_entity`、`aedit_relation`)。
插入自定义知识图谱 ```python custom_kg = { "chunks": [ { "content": "Alice and Bob are collaborating on quantum computing research.", "source_id": "doc-1", "file_path": "test_file", } ], "entities": [ { "entity_name": "Alice", "entity_type": "person", "description": "Alice is a researcher specializing in quantum physics.", "source_id": "doc-1", "file_path": "test_file" }, { "entity_name": "Bob", "entity_type": "person", "description": "Bob is a mathematician.", "source_id": "doc-1", "file_path": "test_file" }, { "entity_name": "Quantum Computing", "entity_type": "technology", "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", "source_id": "doc-1", "file_path": "test_file" } ], "relationships": [ { "src_id": "Alice", "tgt_id": "Bob", "description": "Alice and Bob are research partners.", "keywords": "collaboration research", "weight": 1.0, "source_id": "doc-1", "file_path": "test_file" }, { "src_id": "Alice", "tgt_id": "Quantum Computing", "description": "Alice conducts research on quantum computing.", "keywords": "research expertise", "weight": 1.0, "source_id": "doc-1", "file_path": "test_file" }, { "src_id": "Bob", "tgt_id": "Quantum Computing", "description": "Bob researches quantum computing.", "keywords": "research application", "weight": 1.0, "source_id": "doc-1", "file_path": "test_file" } ] } rag.insert_custom_kg(custom_kg) ```
其它实体与关系操作 - **create_entity**:创建具有指定属性的新实体 - **edit_entity**:更新现有实体的属性或重命名它 - **create_relation**:在现有实体之间创建新关系 - **edit_relation**:更新现有关系的属性 这些操作在图数据库和向量数据库组件之间保持数据一致性,确保您的知识图谱保持连贯。
## 删除功能 LightRAG 提供了全面的删除能力,允许您删除文档、实体和关系。
删除实体 您可以通过实体名称删除实体及其所有关联关系: ```python # 删除实体及其所有关系(同步版本) rag.delete_by_entity("Google") # 异步版本 await rag.adelete_by_entity("Google") ``` 删除实体时: - 从知识图谱中移除该实体节点 - 删除所有关联的关系 - 从向量数据库中移除相关的嵌入向量 - 保持知识图谱的完整性
删除关系 您可以删除两个特定实体之间的关系: ```python # 删除两个实体之间的关系(同步版本) rag.delete_by_relation("Google", "Gmail") # 异步版本 await rag.adelete_by_relation("Google", "Gmail") ``` 删除关系时: - 移除指定的关系边 - 从向量数据库中删除该关系的嵌入向量 - 保留实体节点及其它关系
通过文档 ID 删除 您可以通过文档 ID 删除整个文档及其所有相关的知识: ```python # 通过文档 ID 删除(异步版本) await rag.adelete_by_doc_id("doc-12345") ``` 通过文档 ID 删除时的优化处理: - **智能清理**:自动识别并删除仅属于该文档的实体和关系 - **保留共享知识**:如果实体或关系在其他文档中也存在,则会保留并重新构建其描述 - **缓存优化**:清理相关的 LLM 缓存以减少存储开销 - **增量重建**:从剩余文档中重新构建受影响的实体和关系描述 删除过程包括: 1. 删除与该文档相关的所有文本块 2. 识别并删除仅属于该文档的实体和关系 3. 重新构建在其他文档中仍存在的实体和关系 4. 更新所有相关的向量索引 5. 清理文档状态记录 注意:由于涉及复杂的知识图谱重构过程,通过文档 ID 删除是一个异步操作。
**重要提醒:** 1. **不可逆操作**:所有删除操作都是不可逆的,请谨慎使用 2. **性能考虑**:删除大量数据可能需要一些时间,特别是通过文档 ID 删除 3. **数据一致性**:删除操作会自动维护知识图谱与向量数据库之间的一致性 4. **备份建议**:在执行重要删除操作前,请考虑备份数据 **批量删除建议:** - 对于批量删除操作,建议使用异步方法以获得更好的性能 - 对于大规模删除,建议分批处理以避免系统负载过高 ## 实体合并
合并实体及其关系 LightRAG 现在支持将多个实体合并为单个实体,并自动处理所有关系: ```python # 基础实体合并 rag.merge_entities( source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"], target_entity="AI Technology" ) ``` 使用自定义合并策略: ```python # 为不同字段定义自定义合并策略 rag.merge_entities( source_entities=["John Smith", "Dr. Smith", "J. Smith"], target_entity="John Smith", merge_strategy={ "description": "concatenate", # 合并所有描述 "entity_type": "keep_first", # 保留第一个实体的类型 "source_id": "join_unique" # 合并所有唯一的源 ID } ) ``` 使用自定义目标实体数据: ```python # 为合并后的实体指定精确值 rag.merge_entities( source_entities=["New York", "NYC", "Big Apple"], target_entity="New York City", target_entity_data={ "entity_type": "LOCATION", "description": "New York City is the most populous city in the United States.", } ) ``` 结合上述两种方式的高级用法: ```python # 合并公司实体,同时使用策略和自定义数据 rag.merge_entities( source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"], target_entity="Microsoft", merge_strategy={ "description": "concatenate", # 合并所有描述 "source_id": "join_unique" # 合并源 ID }, target_entity_data={ "entity_type": "ORGANIZATION", } ) ``` 合并实体时: * 所有来自源实体的关系都会重定向到目标实体 * 重复的关系会被智能合并 * 防止出现自我指向的关系(自环) * 合并完成后源实体会被移除 * 关系权重和属性会被保留
## 多模态文档处理(RAG-Anything 集成) LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 无缝集成,这是一个专门为 LightRAG 构建的**全能多模态文档处理 RAG 系统**。RAG-Anything 能够实现先进的解析和检索增强生成(RAG)能力,允许您无缝处理多模态文档,并从各种文档格式中提取结构化内容——包括文本、图像、表格和公式——以集成到您的 RAG 流程中。 **核心特性:** - **端到端多模态流程**:从文档摄取解析到智能多模态问答的完整工作流程 - **通用文档支持**:无缝处理 PDF、Office 文档(DOC/DOCX/PPT/PPTX/XLS/XLSX)、图像及多种文件格式 - **专业内容分析**:针对图像、表格、数学公式及异构内容类型的专用处理器 - **多模态知识图谱**:自动实体提取和跨模态关系发现,增强理解力 - **混合智能检索**:跨越文本和多模态内容的高级搜索能力,具备上下文理解 **快速开始:** 1. 安装 RAG-Anything: ```bash pip install raganything ``` 2. 处理多模态文档:
RAGAnything 使用示例 ```python import asyncio from raganything import RAGAnything from lightrag import LightRAG from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc import os async def load_existing_lightrag(): # 首先,创建或加载一个现有的 LightRAG 实例 lightrag_working_dir = "./existing_lightrag_storage" # 检查先前的 LightRAG 实例是否存在 if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): print("✅ Found existing LightRAG instance, loading...") else: print("❌ No existing LightRAG instance found, will create new one") from functools import partial # 使用您的配置创建/加载 LightRAG 实例 lightrag_instance = LightRAG( working_dir=lightrag_working_dir, llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="your-api-key", **kwargs, ), embedding_func=EmbeddingFunc( embedding_dim=3072, max_token_size=8192, model="text-embedding-3-large", func=partial( openai_embed.func, # 使用 .func 访问未封装的原始函数 model="text-embedding-3-large", api_key=api_key, base_url=base_url, ), ) ) # 初始化存储(这将加载现有数据,如果有的话) await lightrag_instance.initialize_storages() # 现在使用现有的 LightRAG 实例初始化 RAGAnything rag = RAGAnything( lightrag=lightrag_instance, # 传入现有的 LightRAG 实例 # 仅在多模态处理时需要视觉模型 vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( "gpt-4o", "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, {"role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} ]} if image_data else {"role": "user", "content": prompt} ], api_key="your-api-key", **kwargs, ) if image_data else openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="your-api-key", **kwargs, ) # 注意:working_dir, llm_model_func, embedding_func 等都继承自 lightrag_instance ) # 查询现有的知识库 result = await rag.query_with_multimodal( "What data has been processed in this LightRAG instance?", mode="hybrid" ) print("Query result:", result) # 向现有的 LightRAG 实例添加新的多模态文档 await rag.process_document_complete( file_path="path/to/new/multimodal_document.pdf", output_dir="./output" ) if __name__ == "__main__": asyncio.run(load_existing_lightrag()) ```
有关详细文档和高级用法,请参考 [RAG-Anything 仓库](https://github.com/HKUDS/RAG-Anything)。 ## Token 使用量跟踪
概览与用法 LightRAG 提供了一个 TokenTracker 工具,用于监控和管理大语言模型的 token 消耗情况。此功能对于控制 API 成本和优化性能非常有用。 ### 用法 ```python from lightrag.utils import TokenTracker # 创建 TokenTracker 实例 token_tracker = TokenTracker() # 方法 1:使用上下文管理器(推荐) # 适用于需要自动跟踪 token 使用量的场景 with token_tracker: result1 = await llm_model_func("your question 1") result2 = await llm_model_func("your question 2") # 方法 2:手动添加 token 使用记录 # 适用于需要更精细控制 token 统计的场景 token_tracker.reset() rag.insert() rag.query("your question 1", param=QueryParam(mode="naive")) rag.query("your question 2", param=QueryParam(mode="mix")) # 显示总 token 使用量(包括插入和查询操作) print("Token usage:", token_tracker.get_usage()) ``` ### 使用技巧 - 在长会话或批量操作中使用上下文管理器,自动跟踪所有 token 消耗 - 对于需要分段统计的场景,使用手动模式并在适当时候调用 reset() - 定期检查 token 使用量有助于及早发现异常消耗 - 在开发和测试过程中积极使用此功能,以优化生产成本 ### 实践案例 您可以参考以下示例来实施 token 跟踪: - `examples/lightrag_gemini_track_token_demo.py`:使用 Google Gemini 模型的 token 跟踪示例 - `examples/lightrag_siliconcloud_track_token_demo.py`:使用 SiliconCloud 模型的 token 跟踪示例 这些示例展示了如何在不同模型和场景下有效地使用 TokenTracker 功能。
## 数据导出功能 ### 概览 LightRAG 允许您以各种格式导出知识图谱数据,用于分析、共享和备份。系统支持导出实体、关系及关系数据。 ### 导出函数
基础用法 ```python # 基础 CSV 导出(默认格式) rag.export_data("knowledge_graph.csv") # 指定任意格式 rag.export_data("output.xlsx", file_format="excel") ```
支持的不同文件格式 ```python # 以 CSV 格式导出数据 rag.export_data("graph_data.csv", file_format="csv") # 导出到 Excel 工作表 rag.export_data("graph_data.xlsx", file_format="excel") # 以 markdown 格式导出数据 rag.export_data("graph_data.md", file_format="md") # 导出为纯文本 rag.export_data("graph_data.txt", file_format="txt") ```
附加选项 在导出中包含向量嵌入(可选): ```python rag.export_data("complete_data.csv", include_vector_data=True) ```
### 导出中包含的数据 所有导出均包含: * 实体信息(名称、ID、元数据) * 关系数据(实体间的连接) * 来自向量数据库的关系信息 ## 缓存
清除缓存 您可以使用 `aclear_cache()` 清空当前配置的 LLM 响应缓存存储。该 API 会清除 `llm_response_cache` 中的全部缓存项,不支持按模式或缓存类型进行选择性清理。 ```python # 清除所有缓存 await rag.aclear_cache() # 同步版本 rag.clear_cache() ``` 如果需要按类型管理查询相关缓存,可以使用 `lightrag.tools.clean_llm_query_cache` 工具,并参考说明文档 [lightrag/tools/README_CLEAN_LLM_QUERY_CACHE.md](./lightrag/tools/README_CLEAN_LLM_QUERY_CACHE.md)。该工具可管理 `mix`、`hybrid`、`local` 和 `global` 模式下的查询缓存与关键词缓存;它不会清理 `default:extract:*` 和 `default:summary:*` 这类提取缓存。
## 故障排除 ### 常见初始化错误 如果您在使用 LightRAG 时遇到以下错误: 1. **`AttributeError: __aenter__`** - **原因**:存储后端未初始化 - **解决方案**:在创建 LightRAG 实例后调用 `await rag.initialize_storages()` 2. **`KeyError: 'history_messages'`** - **原因**:流水线状态未初始化 - **解决方案**:在创建 LightRAG 实例后调用 `await rag.initialize_storages()` 3. **两个错误相继出现** - **原因**:两个初始化方法都未被调用 - **解决方案**:始终遵循以下模式: ```python rag = LightRAG(...) await rag.initialize_storages() ``` ### 模型切换问题 在不同的嵌入模型(embedding models)之间切换时,您必须清空数据目录以避免错误。如果您希望保留 LLM 缓存,唯一可以保留的文件是 `kv_store_llm_response_cache.json`。 ## LightRAG API LightRAG 服务器旨在提供 Web UI 和 API 支持。**有关 LightRAG 服务器的更多信息,请参考 [LightRAG Server](./lightrag/api/README.md)。** ## 图谱可视化 LightRAG 服务器提供了全面的知识图谱可视化功能。它支持各种重力布局、节点查询、子图过滤等。**有关 LightRAG 服务器的更多信息,请参考 [LightRAG Server](./lightrag/api/README.md)。** ![iShot_2025-03-23_12.40.08](./README.assets/iShot_2025-03-23_12.40.08.png) ## Langfuse 可观测性集成 Langfuse 提供了一个可以直接替换 OpenAI 客户端的方案,自动跟踪所有 LLM 交互,使开发者能够在不更改代码的情况下监控、调试和优化其 RAG 系统。 ### 安装可观测性选项 ```bash pip install lightrag-hku pip install lightrag-hku[observability] # 或从源代码安装并启用调试模式 pip install -e . pip install -e ".[observability]" ``` ### 配置 Langfuse 环境变量 修改 .env 文件: ```bash ## Langfuse Observability (Optional) # LLM observability and tracing platform # Install with: pip install lightrag-hku[observability] # Sign up at: https://cloud.langfuse.com or self-host LANGFUSE_SECRET_KEY="" LANGFUSE_PUBLIC_KEY="" LANGFUSE_HOST="https://cloud.langfuse.com" # 或您的自托管实例 LANGFUSE_ENABLE_TRACE=true ``` ### Langfuse 用法 安装并配置完成后,Langfuse 会自动追踪所有 OpenAI LLM 调用。Langfuse 仪表板功能包括: - **追踪(Tracing)**:查看完整的 LLM 调用链 - **分析(Analytics)**:Token 使用情况、延迟、成本指标 - **调试(Debugging)**:检查提示词和响应 - **评估(Evaluation)**:比较模型输出 - **监控(Monitoring)**:实时告警 ### 重要通知 **注意**:LightRAG 目前仅将 OpenAI 兼容的 API 调用与 Langfuse 集成。Ollama、Azure 和 AWS Bedrock 等 API 尚不支持 Langfuse 可观测性。 ## 基于 RAGAS 的评估 **RAGAS** (Retrieval Augmented Generation Assessment) 是一个使用 LLM 对 RAG 系统进行无参考评估的框架。项目中包含一个基于 RAGAS 的评估脚本。有关详细信息,请参考 [基于 RAGAS 的评估框架](lightrag/evaluation/README_EVALUASTION_RAGAS.md)。 ## 评估 ### 数据集 LightRAG 中使用的数据集可以从 [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain) 下载。 ### 生成查询 LightRAG 使用以下提示(prompt)生成高层级查询,相应代码位于 `examples/generate_query.py`。
提示词 ```python Given the following description of a dataset: {description} Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. Output the results in the following structure: - User 1: [user description] - Task 1: [task description] - Question 1: - Question 2: - Question 3: - Question 4: - Question 5: - Task 2: [task description] ... - Task 5: [task description] - User 2: [user description] ... - User 5: [user description] ... ```
### 批量评估 为了在处理高层级查询时评估两个 RAG 系统的性能,LightRAG 使用以下提示词,具体代码见 `reproduce/batch_eval.py`。
提示词 ```python ---Role--- You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. ---Goal--- You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. Here is the question: {query} Here are the two answers: **Answer 1:** {answer1} **Answer 2:** {answer2} Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. Output your evaluation in the following JSON format: {{ "Comprehensiveness": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]" }}, "Empowerment": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]" }}, "Overall Winner": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" }} }} ```
### 总体性能表 ||**农业**||**计算机科学**||**法律**||**混合**|| |----------------------|---------------|------------|------|------------|---------|------------|-------|------------| ||NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**| |**全面性**|32.4%|**67.6%**|38.4%|**61.6%**|16.4%|**83.6%**|38.8%|**61.2%**| |**多样性**|23.6%|**76.4%**|38.0%|**62.0%**|13.6%|**86.4%**|32.4%|**67.6%**| |**赋能性**|32.4%|**67.6%**|38.8%|**61.2%**|16.4%|**83.6%**|42.8%|**57.2%**| |**总体**|32.4%|**67.6%**|38.8%|**61.2%**|15.2%|**84.8%**|40.0%|**60.0%**| ||RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**| |**全面性**|31.6%|**68.4%**|38.8%|**61.2%**|15.2%|**84.8%**|39.2%|**60.8%**| |**多样性**|29.2%|**70.8%**|39.2%|**60.8%**|11.6%|**88.4%**|30.8%|**69.2%**| |**赋能性**|31.6%|**68.4%**|36.4%|**63.6%**|15.2%|**84.8%**|42.4%|**57.6%**| |**总体**|32.4%|**67.6%**|38.0%|**62.0%**|14.4%|**85.6%**|40.0%|**60.0%**| ||HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**| |**全面性**|26.0%|**74.0%**|41.6%|**58.4%**|26.8%|**73.2%**|40.4%|**59.6%**| |**多样性**|24.0%|**76.0%**|38.8%|**61.2%**|20.0%|**80.0%**|32.4%|**67.6%**| |**赋能性**|25.2%|**74.8%**|40.8%|**59.2%**|26.0%|**74.0%**|46.0%|**54.0%**| |**总体**|24.8%|**75.2%**|41.6%|**58.4%**|26.4%|**73.6%**|42.4%|**57.6%**| ||GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**| |**全面性**|45.6%|**54.4%**|48.4%|**51.6%**|48.4%|**51.6%**|**50.4%**|49.6%| |**多样性**|22.8%|**77.2%**|40.8%|**59.2%**|26.4%|**73.6%**|36.0%|**64.0%**| |**赋能性**|41.2%|**58.8%**|45.2%|**54.8%**|43.6%|**56.4%**|**50.8%**|49.2%| |**总体**|45.2%|**54.8%**|48.0%|**52.0%**|47.2%|**52.8%**|**50.4%**|49.6%| ## 复现 所有代码均可在 `./reproduce` 目录中找到。 ### Step-0 提取唯一上下文 首先,我们需要提取数据集中的唯一上下文(unique contexts)。
代码 ```python def extract_unique_contexts(input_directory, output_directory): os.makedirs(output_directory, exist_ok=True) jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl')) print(f"Found {len(jsonl_files)} JSONL files.") for file_path in jsonl_files: filename = os.path.basename(file_path) name, ext = os.path.splitext(filename) output_filename = f"{name}_unique_contexts.json" output_path = os.path.join(output_directory, output_filename) unique_contexts_dict = {} print(f"Processing file: {filename}") try: with open(file_path, 'r', encoding='utf-8') as infile: for line_number, line in enumerate(infile, start=1): line = line.strip() if not line: continue try: json_obj = json.loads(line) context = json_obj.get('context') if context and context not in unique_contexts_dict: unique_contexts_dict[context] = None except json.JSONDecodeError as e: print(f"JSON decoding error in file {filename} at line {line_number}: {e}") except FileNotFoundError: print(f"File not found: {filename}") continue except Exception as e: print(f"An error occurred while processing file {filename}: {e}") continue unique_contexts_list = list(unique_contexts_dict.keys()) print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.") try: with open(output_path, 'w', encoding='utf-8') as outfile: json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4) print(f"Unique `context` entries have been saved to: {output_filename}") except Exception as e: print(f"An error occurred while saving to the file {output_filename}: {e}") print("All files have been processed.") ```
### Step-1 插入上下文 我们将提取出的上下文插入到 LightRAG 系统中。
代码 ```python def insert_text(rag, file_path): with open(file_path, mode='r') as f: unique_contexts = json.load(f) retries = 0 max_retries = 3 while retries < max_retries: try: rag.insert(unique_contexts) break except Exception as e: retries += 1 print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}") time.sleep(10) if retries == max_retries: print("Insertion failed after exceeding the maximum number of retries") ```
### Step-2 生成查询 我们从数据集每个上下文的前半部分和后半部分提取 token,然后将它们组合作为数据集描述来生成查询。
代码 ```python tokenizer = GPT2Tokenizer.from_pretrained('gpt2') def get_summary(context, tot_tokens=2000): tokens = tokenizer.tokenize(context) half_tokens = tot_tokens // 2 start_tokens = tokens[1000:1000 + half_tokens] end_tokens = tokens[-(1000 + half_tokens):1000] summary_tokens = start_tokens + end_tokens summary = tokenizer.convert_tokens_to_string(summary_tokens) return summary ```
### Step-3 查询 对于 Step-2 中生成的查询,我们将提取它们并对 LightRAG 进行查询。
代码 ```python def extract_queries(file_path): with open(file_path, 'r') as f: data = f.read() data = data.replace('**', '') queries = re.findall(r'- Question \d+: (.+)', data) return queries ```
## 🔗 相关项目 *生态与扩展*
📸
RAG-Anything
多模态 RAG
🎥
VideoRAG
极端长上下文视频 RAG
MiniRAG
极简 RAG
--- ## ⭐ Star 历史 Star History Chart ## 🤝 贡献
我们感谢所有贡献者做出的宝贵贡献。
--- ## 📖 引用 ```python @article{guo2024lightrag, title={LightRAG: Simple and Fast Retrieval-Augmented Generation}, author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang}, year={2024}, eprint={2410.05779}, archivePrefix={arXiv}, primaryClass={cs.IR} } ``` ---
感谢您访问 LightRAG!
================================================ FILE: README.md ================================================
LightRAG Logo
# 🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
HKUDS%2FLightRAG | Trendshift

LightRAG Diagram
---
LiteWrite
--- ## 🎉 News - [2025.11]🎯[New Feature]: Integrated **RAGAS for Evaluation** and **Langfuse for Tracing**. Updated the API to return retrieved contexts alongside query results to support context precision metrics. - [2025.10]🎯[Scalability Enhancement]: Eliminated processing bottlenecks to support **Large-Scale Datasets Efficiently**. - [2025.09]🎯[New Feature] Enhances knowledge graph extraction accuracy for **Open-Sourced LLMs** such as Qwen3-30B-A3B. - [2025.08]🎯[New Feature] **Reranker** is now supported, significantly boosting performance for mixed queries (set as default query mode). - [2025.08]🎯[New Feature] Added **Document Deletion** with automatic KG regeneration to ensure optimal query performance. - [2025.06]🎯[New Release] Our team has released [RAG-Anything](https://github.com/HKUDS/RAG-Anything) — an **All-in-One Multimodal RAG** system for seamless processing of text, images, tables, and equations. - [2025.06]🎯[New Feature] LightRAG now supports comprehensive multimodal data handling through [RAG-Anything](https://github.com/HKUDS/RAG-Anything) integration, enabling seamless document parsing and RAG capabilities across diverse formats including PDFs, images, Office documents, tables, and formulas. Please refer to the new [multimodal section](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration) for details. - [2025.03]🎯[New Feature] LightRAG now supports citation functionality, enabling proper source attribution and enhanced document traceability. - [2025.02]🎯[New Feature] You can now use MongoDB as an all-in-one storage solution for unified data management. - [2025.02]🎯[New Release] Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG)-a RAG system for understanding extremely long-context videos - [2025.01]🎯[New Release] Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models. - [2025.01]🎯You can now use PostgreSQL as an all-in-one storage solution for data management. - [2024.11]🎯[New Resource] A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). — explore in-depth tutorials and best practices. Many thanks to the blog author for this excellent contribution! - [2024.11]🎯[New Feature] Introducing the LightRAG WebUI — an interface that allows you to insert, query, and visualize LightRAG knowledge through an intuitive web-based dashboard. - [2024.11]🎯[New Feature] You can now [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage)-enabling graph database support. - [2024.10]🎯[New Feature] We've added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). — a walkthrough of LightRAG's capabilities. Thanks to the author for this excellent contribution! - [2024.10]🎯[New Channel] We have created a [Discord channel](https://discord.gg/yF2MmDJyGJ)!💬 Welcome to join our community for sharing, discussions, and collaboration! 🎉🎉 - [2024.10]🎯[New Feature] LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
Algorithm Flowchart ![LightRAG Indexing Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-VectorDB-Json-KV-Store-Indexing-Flowchart-scaled.jpg) *Figure 1: LightRAG Indexing Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)* ![LightRAG Retrieval and Querying Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-Querying-Flowchart-Dual-Level-Retrieval-Generation-Knowledge-Graphs-scaled.jpg) *Figure 2: LightRAG Retrieval and Querying Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)*
## Installation > **💡 Using uv for Package Management**: This project uses [uv](https://docs.astral.sh/uv/) for fast and reliable Python package management. > Install uv first: `curl -LsSf https://astral.sh/uv/install.sh | sh` (Unix/macOS) or `powershell -c "irm https://astral.sh/uv/install.ps1 | iex"` (Windows) > > **Note**: You can also use pip if you prefer, but uv is recommended for better performance and more reliable dependency management. > > **📦 Offline Deployment**: For offline or air-gapped environments, see the [Offline Deployment Guide](./docs/OfflineDeployment.md) for instructions on pre-installing all dependencies and cache files. ### Install LightRAG Server The LightRAG Server is designed to provide Web UI and API support. The Web UI facilitates document indexing, knowledge graph exploration, and a simple RAG query interface. LightRAG Server also provide an Ollama compatible interfaces, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat bot, such as Open WebUI, to access LightRAG easily. * Install from PyPI ```bash ### Install LightRAG Server as tool using uv (recommended) uv tool install "lightrag-hku[api]" ### Or using pip # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install "lightrag-hku[api]" ### Build front-end artifacts cd lightrag_webui bun install --frozen-lockfile bun run build cd .. # Setup env file # Obtain the env.example file by downloading it from the GitHub repository root # or by copying it from a local source checkout. cp env.example .env # Update the .env with your LLM and embedding configurations # Launch the server lightrag-server ``` * Installation from Source ```bash git clone https://github.com/HKUDS/LightRAG.git cd LightRAG # Using uv (recommended) # Note: uv sync automatically creates a virtual environment in .venv/ uv sync --extra api source .venv/bin/activate # Activate the virtual environment (Linux/macOS) # Or on Windows: .venv\Scripts\activate ### Or using pip with virtual environment # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install -e ".[api]" # Build front-end artifacts cd lightrag_webui bun install --frozen-lockfile bun run build cd .. # setup env file cp env.example .env # Update the .env with your LLM and embedding configurations # Launch API-WebUI server lightrag-server ``` * Launching the LightRAG Server with Docker Compose ```bash git clone https://github.com/HKUDS/LightRAG.git cd LightRAG cp env.example .env # Update the .env with your LLM and embedding configurations # modify LLM and Embedding settings in .env docker compose up ``` > Historical versions of LightRAG docker images can be found here: [LightRAG Docker Images]( https://github.com/HKUDS/LightRAG/pkgs/container/lightrag) ### Create .env File With Setup Tool Instead of editing `env.example` by hand, use the interactive setup wizard to generate a configured `.env` and, when needed, `docker-compose.final.yml`: ```bash make env-base # Required first step: LLM, embedding, reranker make env-storage # Optional: storage backends and database services make env-server # Optional: server port, auth, and SSL make env-base-rewrite # Optional: force-regenerate wizard-managed compose services make env-storage-rewrite # Optional: force-regenerate wizard-managed compose services make env-security-check # Optional: audit the current .env for security risks ``` For full description of every target see [docs/InteractiveSetup.md](./docs/InteractiveSetup.md). The setup wizards update configuration only; run `make env-security-check` separately to audit the current `.env` for security risks before deployment. By default, rerunning the setup preserves unchanged wizard-managed compose service blocks; use a `*-rewrite` target only when you need to rebuild those managed blocks from the bundled templates. ### Install LightRAG Core * Install from source (Recommended) ```bash cd LightRAG # Note: uv sync automatically creates a virtual environment in .venv/ uv sync source .venv/bin/activate # Activate the virtual environment (Linux/macOS) # Or on Windows: .venv\Scripts\activate # Or: pip install -e . ``` * Install from PyPI ```bash uv pip install lightrag-hku # Or: pip install lightrag-hku ``` ## Quick Start ### LLM and Technology Stack Requirements for LightRAG LightRAG's demands on the capabilities of Large Language Models (LLMs) are significantly higher than those of traditional RAG, as it requires the LLM to perform entity-relationship extraction tasks from documents. Configuring appropriate Embedding and Reranker models is also crucial for improving query performance. - **LLM Selection**: - It is recommended to use an LLM with at least 32 billion parameters. - The context length should be at least 32KB, with 64KB being recommended. - It is not recommended to choose reasoning models during the document indexing stage. - During the query stage, it is recommended to choose models with stronger capabilities than those used in the indexing stage to achieve better query results. - **Embedding Model**: - A high-performance Embedding model is essential for RAG. - We recommend using mainstream multilingual Embedding models, such as: `BAAI/bge-m3` and `text-embedding-3-large`. - **Important Note**: The Embedding model must be determined before document indexing, and the same model must be used during the document query phase. For certain storage solutions (e.g., PostgreSQL), the vector dimension must be defined upon initial table creation. Therefore, when changing embedding models, it is necessary to delete the existing vector-related tables and allow LightRAG to recreate them with the new dimensions. - **Reranker Model Configuration**: - Configuring a Reranker model can significantly enhance LightRAG's retrieval performance. - When a Reranker model is enabled, it is recommended to set the "mix mode" as the default query mode. - We recommend using mainstream Reranker models, such as: `BAAI/bge-reranker-v2-m3` or models provided by services like Jina. ### Quick Start for LightRAG Server * For more information about LightRAG Server, please refer to [LightRAG Server](./lightrag/api/README.md). ### Quick Start for LightRAG core To get started with LightRAG core, refer to the sample codes available in the `examples` folder. Additionally, a [video demo](https://www.youtube.com/watch?v=g21royNJ4fw) demonstration is provided to guide you through the local setup process. If you already possess an OpenAI API key, you can run the demo right away: ```bash ### you should run the demo code with project folder cd LightRAG ### provide your API-KEY for OpenAI export OPENAI_API_KEY="sk-...your_opeai_key..." ### download the demo document of "A Christmas Carol" by Charles Dickens curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt ### run the demo code python examples/lightrag_openai_demo.py ``` For a streaming response implementation example, please see `examples/lightrag_openai_compatible_demo.py`. Prior to execution, ensure you modify the sample code's LLM and embedding configurations accordingly. **Note 1**: When running the demo program, please be aware that different test scripts may use different embedding models. If you switch to a different embedding model, you must clear the data directory (`./dickens`); otherwise, the program may encounter errors. If you wish to retain the LLM cache, you can preserve the `kv_store_llm_response_cache.json` file while clearing the data directory. **Note 2**: Only `lightrag_openai_demo.py` and `lightrag_openai_compatible_demo.py` are officially supported sample codes. Other sample files are community contributions that haven't undergone full testing and optimization. ## Programming with LightRAG Core > ⚠️ **If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server**. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations. ### ⚠️ Important: Initialization Requirements **LightRAG requires explicit initialization before use.** You must call `await rag.initialize_storages()` after creating a LightRAG instance, otherwise you will encounter errors. ### A Simple Program Use the below Python snippet to initialize LightRAG, insert text to it, and perform queries: ```python import os import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed from lightrag.utils import setup_logger setup_logger("lightrag", level="INFO") WORKING_DIR = "./rag_storage" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) # IMPORTANT: Both initialization calls are required! await rag.initialize_storages() # Initialize storage backends return rag async def main(): try: # Initialize RAG instance rag = await initialize_rag() await rag.ainsert("Your text") # Perform hybrid search mode = "hybrid" print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode=mode) ) ) except Exception as e: print(f"An error occurred: {e}") finally: if rag: await rag.finalize_storages() if __name__ == "__main__": asyncio.run(main()) ``` Important notes for the above snippet: - Export your OPENAI_API_KEY environment variable before running the script. - This program uses the default storage settings for LightRAG, so all data will be persisted to WORKING_DIR/rag_storage. - This program demonstrates only the simplest way to initialize a LightRAG object: Injecting the embedding and LLM functions, and initializing storage and pipeline status after creating the LightRAG object. ### LightRAG init parameters A full list of LightRAG init parameters:
Parameters | **Parameter** | **Type** | **Explanation** | **Default** | | -------------- | ---------- | ----------------- | ------------- | | **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` | | **workspace** | str | Workspace name for data isolation between different LightRAG Instances | | | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage`,`OpenSearchKVStorage` | `JsonKVStorage` | | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage`,`OpenSearchVectorDBStorage` | `NanoVectorDBStorage` | | **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage`,`OpenSearchGraphStorage` | `NetworkXStorage` | | **doc_status_storage** | `str` | Storage type for documents process status. Supported types: `JsonDocStatusStorage`,`PGDocStatusStorage`,`MongoDocStatusStorage`,`OpenSearchDocStatusStorage` | `JsonDocStatusStorage` | | **chunk_token_size** | `int` | Maximum token size per chunk when splitting documents | `1200` | | **chunk_overlap_token_size** | `int` | Overlap token size between two chunks when splitting documents | `100` | | **tokenizer** | `Tokenizer` | The function used to convert text into tokens (numbers) and back using .encode() and .decode() functions following `TokenizerInterface` protocol. If you don't specify one, it will use the default Tiktoken tokenizer. | `TiktokenTokenizer` | | **tiktoken_model_name** | `str` | If you're using the default Tiktoken tokenizer, this is the name of the specific Tiktoken model to use. This setting is ignored if you provide your own tokenizer. | `gpt-4o-mini` | | **entity_extract_max_gleaning** | `int` | Number of loops in the entity extraction process, appending history messages | `1` | | **node_embedding_algorithm** | `str` | Algorithm for node embedding (currently not used) | `node2vec` | | **node2vec_params** | `dict` | Parameters for node embedding | `{"dimensions": 1536,"num_walks": 10,"walk_length": 40,"window_size": 2,"iterations": 3,"random_seed": 3,}` | | **embedding_func** | `EmbeddingFunc` | Function to generate embedding vectors from text | `openai_embed` | | **embedding_batch_num** | `int` | Maximum batch size for embedding processes (multiple texts sent per batch) | `32` | | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` | | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` | | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` | | **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`(configured by env var SUMMARY_CONTEXT_SIZE) | | **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`(configured by env var SUMMARY_MAX_TOKENS) | | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) | | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | | | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) | | **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` | | **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` | | **addon_params** | `dict` | Additional parameters, e.g., `{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`: sets example limit, entity/relation extraction output language | language: English` | | **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
### Query Param Use QueryParam to control the behavior your query: ```python class QueryParam: """Configuration parameters for query execution in LightRAG.""" mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global" """Specifies the retrieval mode: - "local": Focuses on context-dependent information. - "global": Utilizes global knowledge. - "hybrid": Combines local and global retrieval methods. - "naive": Performs a basic search without advanced techniques. - "mix": Integrates knowledge graph and vector retrieval. """ only_need_context: bool = False """If True, only returns the retrieved context without generating a response.""" only_need_prompt: bool = False """If True, only returns the generated prompt without producing a response.""" response_type: str = "Multiple Paragraphs" """Defines the response format. Examples: 'Multiple Paragraphs', 'Single Paragraph', 'Bullet Points'.""" stream: bool = False """If True, enables streaming output for real-time responses.""" top_k: int = int(os.getenv("TOP_K", "60")) """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "20")) """Number of text chunks to retrieve initially from vector search and keep after reranking. If None, defaults to top_k value. """ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "6000")) """Maximum number of tokens allocated for entity context in unified token control system.""" max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "8000")) """Maximum number of tokens allocated for relationship context in unified token control system.""" max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000")) """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" # History messages are only sent to LLM for context, not used for retrieval conversation_history: list[dict[str, str]] = field(default_factory=list) """Stores past conversation history to maintain context. Format: [{"role": "user/assistant", "content": "message"}]. """ # Deprecated (ids filter lead to potential hallucination effects) ids: list[str] | None = None """List of ids to filter the results.""" model_func: Callable[..., object] | None = None """Optional override for the LLM model function to use for this specific query. If provided, this will be used instead of the global model function. This allows using different models for different query modes. """ user_prompt: str | None = None """User-provided prompt for the query. Addition instructions for LLM. If provided, this will be inject into the prompt template. It's purpose is the let user customize the way LLM generate the response. """ enable_rerank: bool = True """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True to enable reranking when rerank model is available. """ ``` > default value of Top_k can be change by environment variables TOP_K. ### LLM and Embedding Injection LightRAG requires the utilization of LLM and Embedding models to accomplish document indexing and querying tasks. During the initialization phase, it is necessary to inject the invocation methods of the relevant models into LightRAG:
Using Open AI-like APIs * LightRAG also supports Open AI-like chat/embeddings APIs: ```python import os import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.openai import openai_complete_if_cache, openai_embed async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await openai_complete_if_cache( "solar-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("UPSTAGE_API_KEY"), base_url="https://api.upstage.ai/v1/solar", **kwargs ) @wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192, model_name="solar-embedding-1-large-query") async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed.func( texts, model="solar-embedding-1-large-query", api_key=os.getenv("UPSTAGE_API_KEY"), base_url="https://api.upstage.ai/v1/solar" ) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=embedding_func # Pass the decorated function directly ) await rag.initialize_storages() return rag ``` > **Important Note on Embedding Function Wrapping:** > > `EmbeddingFunc` cannot be nested. Functions that have been decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions.
Using Hugging Face Models * If you want to use Hugging Face models, you only need to set LightRAG as follows: See `lightrag_hf_demo.py` ```python from functools import partial from transformers import AutoTokenizer, AutoModel # Pre-load tokenizer and model tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") embed_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") # Initialize LightRAG with Hugging Face model rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=hf_model_complete, # Use Hugging Face model for text generation llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face # Use Hugging Face embedding function embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=2048, model_name="sentence-transformers/all-MiniLM-L6-v2", func=partial( hf_embed.func, # Use .func to access the unwrapped function tokenizer=tokenizer, embed_model=embed_model ) ), ) ```
Using Ollama Models **Overview** If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`. Then you only need to set LightRAG as follows: ```python import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed @wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") # Initialize LightRAG with Ollama model rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, # Use Ollama model for text generation llm_model_name='your_model_name', # Your model name embedding_func=embedding_func, # Pass the decorated function directly ) ``` * **Increasing context size** In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways: * **Increasing the `num_ctx` parameter in Modelfile** 1. Pull the model: ```bash ollama pull qwen2 ``` 2. Display the model file: ```bash ollama show --modelfile qwen2 > Modelfile ``` 3. Edit the Modelfile by adding the following line: ```bash PARAMETER num_ctx 32768 ``` 4. Create the modified model: ```bash ollama create -f Modelfile qwen2m ``` * **Setup `num_ctx` via Ollama API** Tiy can use `llm_model_kwargs` param to configure ollama: ```python import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed @wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, # Use Ollama model for text generation llm_model_name='your_model_name', # Your model name llm_model_kwargs={"options": {"num_ctx": 32768}}, embedding_func=embedding_func, # Pass the decorated function directly ) ``` > **Important Note on Embedding Function Wrapping:** > > `EmbeddingFunc` cannot be nested. Functions that have been decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions. * **Low RAM GPUs** In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
LlamaIndex LightRAG supports integration with LlamaIndex (`llm/llama_index_impl.py`): - Integrates with OpenAI and other providers through LlamaIndex - See [LlamaIndex Documentation](https://developers.llamaindex.ai/python/framework/) for detailed setup or the [examples](examples/unofficial-sample/) **Example Usage** ```python # Using LlamaIndex with direct OpenAI access import asyncio from lightrag import LightRAG from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.openai import OpenAI from lightrag.utils import setup_logger # Setup log handler for LightRAG setup_logger("lightrag", level="INFO") async def initialize_rag(): rag = LightRAG( working_dir="your/path", llm_model_func=llama_index_complete_if_cache, # LlamaIndex-compatible completion function embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function embedding_dim=1536, max_token_size=2048, model_name=embed_model, func=partial(llama_index_embed.func, embed_model=embed_model) # Use .func to access the unwrapped function ), ) await rag.initialize_storages() return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Perform naive search print( rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) ) # Perform local search print( rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) ) # Perform global search print( rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) ) # Perform hybrid search print( rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) ) if __name__ == "__main__": main() ``` **For detailed documentation and examples, see:** - [LlamaIndex Documentation](https://developers.llamaindex.ai/python/framework/) - [Direct OpenAI Example](examples/unofficial-sample/lightrag_llamaindex_direct_demo.py) - [LiteLLM Proxy Example](examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py) - [LiteLLM Proxy with Opik Example](examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py)
Using Azure OpenAI Models If you want to use Azure OpenAI models, you only need to set up LightRAG as follows: ```python import os import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.azure_openai import azure_openai_complete_if_cache, azure_openai_embed # Configure the generation model async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await azure_openai_complete_if_cache( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_version=os.getenv("AZURE_OPENAI_API_VERSION"), deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"), **kwargs ) # Configure the embedding model @wrap_embedding_func_with_attrs( embedding_dim=1536, max_token_size=8192, model_name=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL") ) async def embedding_func(texts: list[str]) -> np.ndarray: return await azure_openai_embed.func( texts, api_key=os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_version=os.getenv("AZURE_OPENAI_API_VERSION"), deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME") ) rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=embedding_func ) ```
Using Google Gemini Models If you want to use Google Gemini models, you only need to set up LightRAG as follows: ```python import os import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.gemini import gemini_model_complete, gemini_embed # Configure the generation model async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await gemini_model_complete( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("GEMINI_API_KEY"), model_name="gemini-2.0-flash", **kwargs ) # Configure the embedding model @wrap_embedding_func_with_attrs( embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004" ) async def embedding_func(texts: list[str]) -> np.ndarray: return await gemini_embed.func( texts, api_key=os.getenv("GEMINI_API_KEY"), model="models/text-embedding-004" ) rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, llm_model_name="gemini-2.0-flash", embedding_func=embedding_func ) ```
### Rerank Function Injection To enhance retrieval quality, documents can be re-ranked based on a more effective relevance scoring model. The `rerank.py` file provides three Reranker provider driver functions: * **Cohere / vLLM**: `cohere_rerank` * **Jina AI**: `jina_rerank` * **Aliyun**: `ali_rerank` You can inject one of these functions into the `rerank_model_func` attribute of the LightRAG object. This will enable LightRAG's query function to re-order retrieved text blocks using the injected function. For detailed usage, please refer to the `examples/rerank_example.py` file. ### User Prompt vs. Query When using LightRAG for content queries, avoid combining the search process with unrelated output processing, as this significantly impacts query effectiveness. The `user_prompt` parameter in Query Param is specifically designed to address this issue — it does not participate in the RAG retrieval phase, but rather guides the LLM on how to process the retrieved results after the query is completed. Here's how to use it: ```python # Create query parameters query_param = QueryParam( mode = "hybrid", # Other modes:local, global, hybrid, mix, naive user_prompt = "For diagrams, use mermaid format with English/Pinyin node names and Chinese display labels", ) # Query and process response_default = rag.query( "Please draw a character relationship diagram for Scrooge", param=query_param ) print(response_default) ``` ### Insert
Basic Insert ```python # Basic Insert rag.insert("Text") ```
Batch Insert ```python # Basic Batch Insert: Insert multiple texts at once rag.insert(["TEXT1", "TEXT2",...]) # Batch Insert with custom batch size configuration rag = LightRAG( ... working_dir=WORKING_DIR, max_parallel_insert = 4 ) rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 4 ``` The `max_parallel_insert` parameter determines the number of documents processed concurrently in the document indexing pipeline. If unspecified, the default value is **2**. We recommend keeping this setting **below 10**, as the performance bottleneck typically lies with the LLM (Large Language Model) processing.
Insert with ID If you want to provide your own IDs for your documents, number of documents and number of IDs must be the same. ```python # Insert single text, and provide ID for it rag.insert("TEXT1", ids=["ID_FOR_TEXT1"]) # Insert multiple texts, and provide IDs for them rag.insert(["TEXT1", "TEXT2",...], ids=["ID_FOR_TEXT1", "ID_FOR_TEXT2"]) ```
Insert using Pipeline The `apipeline_enqueue_documents` and `apipeline_process_enqueue_documents` functions allow you to perform incremental insertion of documents into the graph.This is useful for scenarios where you want to process documents in the background while still allowing the main thread to continue executing. ```python rag = LightRAG(..) await rag.apipeline_enqueue_documents(input) # Your routine in loop await rag.apipeline_process_enqueue_documents(input) ```
Insert Multi-file Type Support The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF. ```python import textract file_path = 'TEXT.pdf' text_content = textract.process(file_path) rag.insert(text_content.decode('utf-8')) ```
Citation Functionality By providing file paths, the system ensures that sources can be traced back to their original documents. ```python # Define documents and their file paths documents = ["Document content 1", "Document content 2"] file_paths = ["path/to/doc1.txt", "path/to/doc2.txt"] # Insert documents with file paths rag.insert(documents, file_paths=file_paths) ```
### Storage LightRAG uses 4 types of storage for different purposes: * KV_STORAGE: llm response cache, text chunks, document information * VECTOR_STORAGE: entities vectors, relation vectors, chunks vectors * GRAPH_STORAGE: entity relation graph * DOC_STATUS_STORAGE: document indexing status Each storage type has several implementations: * KV_STORAGE supported implementations: ``` JsonKVStorage JsonFile (default) PGKVStorage Postgres RedisKVStorage Redis MongoKVStorage MongoDB OpenSearchKVStorage OpenSearch ``` * GRAPH_STORAGE supported implementations: ``` NetworkXStorage NetworkX (default) Neo4JStorage Neo4J PGGraphStorage PostgreSQL with AGE plugin MemgraphStorage Memgraph OpenSearchGraphStorage OpenSearch ``` > Testing has shown that Neo4J delivers superior performance in production environments compared to PostgreSQL with AGE plugin. * VECTOR_STORAGE supported implementations: ``` NanoVectorDBStorage NanoVector (default) PGVectorStorage Postgres MilvusVectorDBStorage Milvus FaissVectorDBStorage Faiss QdrantVectorDBStorage Qdrant MongoVectorDBStorage MongoDB OpenSearchVectorDBStorage OpenSearch ``` * DOC_STATUS_STORAGE: supported implementations: ``` JsonDocStatusStorage JsonFile (default) PGDocStatusStorage Postgres MongoDocStatusStorage MongoDB OpenSearchDocStatusStorage OpenSearch ``` Example connection configurations for each storage type can be found in the repository's `env.example` file. The database instance in the connection string needs to be created by you on the database server beforehand. LightRAG is only responsible for creating tables within the database instance, not for creating the database instance itself. If using Redis as storage, remember to configure automatic data persistence rules for Redis, otherwise data will be lost after the Redis service restarts. If using PostgreSQL, it is recommended to use version 16.6 or above.
Using Neo4J Storage * For production level scenarios you will most likely want to leverage an enterprise solution * for KG storage. Running Neo4J in Docker is recommended for seamless local testing. * See: https://hub.docker.com/_/neo4j ```python export NEO4J_URI="neo4j://localhost:7687" export NEO4J_USERNAME="neo4j" export NEO4J_PASSWORD="password" export NEO4J_DATABASE="neo4j" #<----------- If you are using community edition neo4j docker image. # Setup logger for LightRAG setup_logger("lightrag", level="INFO") # When you launch the project be sure to override the default KG by specifying graph_storage="Neo4JStorage". # Initialize LightRAG with Neo4J implementation. async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model graph_storage="Neo4JStorage", #<-----------override KG default ) # Initialize database connections await rag.initialize_storages() # Initialize pipeline status for document processing return rag ``` see test_neo4j.py for a working example.
Using PostgreSQL Storage For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE). PostgreSQL version 16.6 or higher is supported. * PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac. * If you prefer docker, please start with this image if you are a beginner to avoid hiccups (Default user password:rag/rag): https://hub.docker.com/r/gzdaniel/postgres-for-rag * How to start? Ref to: [examples/lightrag_gemini_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_gemini_postgres_demo.py) * For high-performance graph database requirements, Neo4j is recommended as Apache AGE's performance is not as competitive.
Using Faiss Storage Before using Faiss vector database, you must manually install `faiss-cpu` or `faiss-gpu`. - Install the required dependencies: ``` pip install faiss-cpu ``` You can also install `faiss-gpu` if you have GPU support. - Here we are using `sentence-transformers` but you can also use `OpenAIEmbedding` model with `3072` dimensions. ```python async def embedding_func(texts: list[str]) -> np.ndarray: model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode(texts, convert_to_numpy=True) return embeddings # Initialize LightRAG with the LLM model function and embedding function rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=2048, model_name="all-MiniLM-L6-v2", func=embedding_func, ), vector_storage="FaissVectorDBStorage", vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": 0.3 # Your desired threshold } ) ```
Using Memgraph for Storage * Memgraph is a high-performance, in-memory graph database compatible with the Neo4j Bolt protocol. * You can run Memgraph locally using Docker for easy testing: * See: https://memgraph.com/download ```python export MEMGRAPH_URI="bolt://localhost:7687" # Setup logger for LightRAG setup_logger("lightrag", level="INFO") # When you launch the project, override the default KG: NetworkX # by specifying kg="MemgraphStorage". # Note: Default settings use NetworkX # Initialize LightRAG with Memgraph implementation. async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model graph_storage="MemgraphStorage", #<-----------override KG default ) # Initialize database connections await rag.initialize_storages() # Initialize pipeline status for document processing return rag ```
Using Milvus for Vector Storage Milvus is a high-performance, scalable vector database for production-level vector storage. LightRAG provides three ways to configure Milvus, plus support for configurable index types to optimize performance and memory usage. ### Supported Index Types - `AUTOINDEX` (default): Milvus automatically selects the best index - `HNSW`: Hierarchical Navigable Small World graph for high recall - `HNSW_SQ`: HNSW with scalar quantization for memory savings (requires Milvus 2.6.8+) - `HNSW_PQ`, `HNSW_PRQ`: HNSW with product / product-residual quantization - `IVF_FLAT`, `IVF_SQ8`, `IVF_PQ`: Inverted-file family indexes - `DISKANN`: Disk-based approximate nearest neighbor - `SCANN`: Scalable nearest neighbor ### Supported Metric Types `COSINE` (default), `L2`, `IP` --- ### Config Approach 1 — Environment Variables (`.env` file) Best for: **LightRAG Server deployments and Docker/k8s setups**. ```bash # Connection MILVUS_URI=http://localhost:19530 MILVUS_DB_NAME=lightrag # MILVUS_USER=root # MILVUS_PASSWORD=your_password # MILVUS_TOKEN=your_token # Storage selection LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage # Index configuration (all optional — sensible defaults apply) MILVUS_INDEX_TYPE=HNSW # Default: AUTOINDEX MILVUS_METRIC_TYPE=COSINE # Default: COSINE MILVUS_HNSW_M=16 # Default: 16, range [2-2048] MILVUS_HNSW_EF_CONSTRUCTION=360 # Default: 360 MILVUS_HNSW_EF=200 # Default: 200 # HNSW_SQ options (requires Milvus 2.6.8+) # MILVUS_INDEX_TYPE=HNSW_SQ # MILVUS_HNSW_SQ_TYPE=SQ8 # SQ4U, SQ6, SQ8, BF16, FP16 # MILVUS_HNSW_SQ_REFINE=false # Enable refinement # MILVUS_HNSW_SQ_REFINE_TYPE=FP32 # Refinement precision # MILVUS_HNSW_SQ_REFINE_K=10 # Refinement expansion factor # IVF options # MILVUS_IVF_NLIST=1024 # MILVUS_IVF_NPROBE=16 ``` Then in Python code: ```python from lightrag import LightRAG async def initialize_rag(): rag = LightRAG( working_dir="./rag_storage", llm_model_func=..., embedding_func=..., vector_storage="MilvusVectorDBStorage", ) await rag.initialize_storages() return rag ``` ### Config Approach 2 — `vector_db_storage_cls_kwargs` (Python SDK) Best for: **Python SDK / framework integration** where you want all config in code. ```python from lightrag import LightRAG async def initialize_rag(): rag = LightRAG( working_dir="./rag_storage", llm_model_func=..., embedding_func=..., vector_storage="MilvusVectorDBStorage", vector_db_storage_cls_kwargs={ "milvus_uri": "http://localhost:19530", "milvus_db_name": "lightrag", "index_type": "HNSW", "metric_type": "COSINE", "hnsw_m": 16, "hnsw_ef_construction": 360, "hnsw_ef": 200, "cosine_better_than_threshold": 0.2, }, ) await rag.initialize_storages() return rag ``` ### Config Approach 3 — `config.ini` (legacy) Connection parameters only; index settings use env vars or kwargs. ```ini [milvus] uri = http://localhost:19530 db_name = lightrag # user = root # password = your_password # token = your_token ``` ### Configuration Priority | Setting | 1st (highest) | 2nd | 3rd (lowest) | |---|---|---|---| | Connection (`uri`, …) | `vector_db_storage_cls_kwargs` | Environment variables | `config.ini` | | Index (`index_type`, …) | `vector_db_storage_cls_kwargs` | Environment variables | defaults | ### HNSW_SQ Compression Trade-offs | SQ Type | Compression | Precision | Notes | |---|---|---|---| | `SQ4U` | ~8× | Lower | Best memory savings | | `SQ6` | ~5.3× | Balanced | Good trade-off | | `SQ8` | ~4× | Good | **Recommended** | | `BF16` / `FP16` | ~2× | High | Near-lossless | **Version Requirements:** - HNSW_SQ index type requires **Milvus 2.6.8 or higher** - LightRAG will automatically validate the server version and raise an error if requirements are not met - Other index types work with Milvus 2.0+ **Backward Compatibility:** - If no index configuration is provided, LightRAG uses AUTOINDEX (Milvus default behavior) - Existing collections are not affected; index configuration only applies to newly created collections For complete configuration options, see `env.example` and `docs/MilvusConfigurationGuide.md`.
Using MongoDB Storage MongoDB provides a one-stop storage solution for LightRAG. MongoDB offers native KV storage and vector storage. LightRAG uses MongoDB collections to implement a simple graph storage. `MongoVectorDBStorage` requires a MongoDB deployment with Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local. The setup wizard's bundled local Docker MongoDB service is MongoDB Community Edition, so it can be used for KV/graph/doc-status storage but not for `MongoVectorDBStorage`.
Using Redis Storage LightRAG supports using Redis as KV storage. When using Redis storage, attention should be paid to persistence configuration and memory usage configuration. The following is the recommended Redis configuration: ``` save 900 1 save 300 10 save 60 1000 stop-writes-on-bgsave-error yes maxmemory 4gb maxmemory-policy noeviction maxclients 500 ``` When the interactive setup manages a local Redis container, it stages a user-editable config at `./data/config/redis.conf` and mounts it into the container. Setup preserves that file on reruns so local Redis tuning can be adjusted without losing manual edits.
Using OpenSearch Storage OpenSearch provides a unified storage solution for all four LightRAG storage types (KV, Vector, Graph, DocStatus). It offers native k-NN vector search, full-text search, and horizontal scalability — all without cloud-only restrictions. * **Requirements**: OpenSearch 3.x or higher with k-NN plugin enabled. Install with Docker (without plugins): ```bash docker run -d -p 9200:9200 -e "discovery.type=single-node" \ -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=" \ opensearchproject/opensearch:latest ``` Install with Docker Compose (Recommended, with plugins): ```bash curl -O https://raw.githubusercontent.com/opensearch-project/opensearch-build/main/docker/release/dockercomposefiles/docker-compose-3.x.yml # Launch OpenSearch cluster OPENSEARCH_INITIAL_ADMIN_PASSWORD= docker-compose -f docker-compose-3.x.yml up -d ``` * **Configuration**: Set environment variables (see `env.example` for full list): ```bash export OPENSEARCH_HOSTS=localhost:9200 export OPENSEARCH_USER=admin export OPENSEARCH_PASSWORD= export OPENSEARCH_USE_SSL=true export OPENSEARCH_VERIFY_CERTS=false ``` * **Usage**: ```python rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=your_llm_func, embedding_func=your_embed_func, kv_storage="OpenSearchKVStorage", doc_status_storage="OpenSearchDocStatusStorage", graph_storage="OpenSearchGraphStorage", vector_storage="OpenSearchVectorDBStorage", ) ``` * **Graph Traversal**: When the OpenSearch SQL plugin with PPL support is available, graph queries use server-side BFS via the `graphlookup` command for optimal performance. Otherwise, it falls back to client-side batched BFS. This is auto-detected at startup, or can be forced via `OPENSEARCH_USE_PPL_GRAPHLOOKUP=true|false`. * **Integration Testing**: To run integration tests against a live OpenSearch cluster: 1. Start OpenSearch using Docker Compose (download [`docker-compose-3.x.yml`](https://raw.githubusercontent.com/opensearch-project/opensearch-build/main/docker/release/dockercomposefiles/docker-compose-3.x.yml)): ```bash OPENSEARCH_INITIAL_ADMIN_PASSWORD= docker-compose -f docker-compose-3.x.yml up -d ``` 2. Verify the cluster is running: ```bash curl -sk -u admin: https://localhost:9200 curl -sk -u admin: https://localhost:9200/_cat/plugins?v ``` 3. Run the unit tests (no OpenSearch required — uses mocks): ```bash python -m pytest tests/test_opensearch_storage.py -v ``` 4. Run the OpenSearch storage demo against the live cluster: ```bash export OPENSEARCH_HOSTS=localhost:9200 export OPENSEARCH_USER=admin export OPENSEARCH_PASSWORD= export OPENSEARCH_USE_SSL=true export OPENSEARCH_VERIFY_CERTS=false python examples/opensearch_storage_demo.py ``` 5. Run the full OpenAI + OpenSearch demo (requires `OPENAI_API_KEY`): ```bash export OPENAI_API_KEY=your-api-key python examples/lightrag_openai_opensearch_graph_demo.py ``` 6. Visualize the knowledge graph via LightRAG WebUI or standalone HTML: Requires [building front-end artifacts](https://github.com/HKUDS/LightRAG/blob/main/lightrag/api/README.md) before starting LightRAG Server. ```bash # Starting lightrag-server with OpenSearch Storage LIGHTRAG_KV_STORAGE=OpenSearchKVStorage \ LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage \ LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage \ LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage \ LLM_BINDING=openai \ EMBEDDING_BINDING=openai \ EMBEDDING_MODEL=text-embedding-3-large \ EMBEDDING_DIM=3072 \ OPENAI_API_KEY=your-api-key \ lightrag-server # Display the knowledge graph via LightRAG WebUI python examples/graph_visual_with_opensearch.py # Open http://localhost:9621/webui/ -> Knowledge Graph # Or generate standalone HTML file python examples/graph_visual_with_opensearch.py --html ```
### Data Isolation Between LightRAG Instances The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable and cannot be changed. Here is how workspaces are implemented for different types of storage: - **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`. - **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`. - **For Qdrant vector database, data isolation is achieved through payload-based partitioning (Qdrant's recommended multitenancy approach):** `QdrantVectorDBStorage` uses shared collections with payload filtering for unlimited workspace scalability. - **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`. - **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage` - **For OpenSearch, data isolation is achieved through index name prefixes:** `OpenSearchKVStorage`, `OpenSearchDocStatusStorage`, `OpenSearchGraphStorage`, `OpenSearchVectorDBStorage` To maintain compatibility with legacy data, the default workspace for PostgreSQL non-graph storage is `default` and, for PostgreSQL AGE graph storage is null, for Neo4j graph storage is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`, `OPENSEARCH_WORKSPACE`. **Usage Example:** For a practical demonstration of managing multiple isolated knowledge bases (e.g., separating "Book" content from "HR Policies") within a single application, refer to the [Workspace Demo](examples/lightrag_gemini_workspace_demo.py). ### AGENTS.md -- Guiding Coding Agents AGENTS.md is a simple, open format for guiding coding agents (https://agents.md/). It is a dedicated, predictable place to provide the context and instructions to help AI coding agents work on LightRAG project. Different AI coders should not maintain separate guidance files individually. If any AI coder cannot automatically recognize AGENTS.md, symbolic links can be used as a solution. After establishing symbolic links, you can prevent them from being committed to the Git repository by configuring your local `.gitignore_global`. ## Edit Entities and Relations LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
Create Entities and Relations ```python # Create new entity entity = rag.create_entity("Google", { "description": "Google is a multinational technology company specializing in internet-related services and products.", "entity_type": "company" }) # Create another entity product = rag.create_entity("Gmail", { "description": "Gmail is an email service developed by Google.", "entity_type": "product" }) # Create relation between entities relation = rag.create_relation("Google", "Gmail", { "description": "Google develops and operates Gmail.", "keywords": "develops operates service", "weight": 2.0 }) ```
Edit Entities and Relations ```python # Edit an existing entity updated_entity = rag.edit_entity("Google", { "description": "Google is a subsidiary of Alphabet Inc., founded in 1998.", "entity_type": "tech_company" }) # Rename an entity (with all its relationships properly migrated) renamed_entity = rag.edit_entity("Gmail", { "entity_name": "Google Mail", "description": "Google Mail (formerly Gmail) is an email service." }) # Edit a relation between entities updated_relation = rag.edit_relation("Google", "Google Mail", { "description": "Google created and maintains Google Mail service.", "keywords": "creates maintains email service", "weight": 3.0 }) ``` All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
Insert Custom KG ```python custom_kg = { "chunks": [ { "content": "Alice and Bob are collaborating on quantum computing research.", "source_id": "doc-1", "file_path": "test_file", } ], "entities": [ { "entity_name": "Alice", "entity_type": "person", "description": "Alice is a researcher specializing in quantum physics.", "source_id": "doc-1", "file_path": "test_file" }, { "entity_name": "Bob", "entity_type": "person", "description": "Bob is a mathematician.", "source_id": "doc-1", "file_path": "test_file" }, { "entity_name": "Quantum Computing", "entity_type": "technology", "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", "source_id": "doc-1", "file_path": "test_file" } ], "relationships": [ { "src_id": "Alice", "tgt_id": "Bob", "description": "Alice and Bob are research partners.", "keywords": "collaboration research", "weight": 1.0, "source_id": "doc-1", "file_path": "test_file" }, { "src_id": "Alice", "tgt_id": "Quantum Computing", "description": "Alice conducts research on quantum computing.", "keywords": "research expertise", "weight": 1.0, "source_id": "doc-1", "file_path": "test_file" }, { "src_id": "Bob", "tgt_id": "Quantum Computing", "description": "Bob researches quantum computing.", "keywords": "research application", "weight": 1.0, "source_id": "doc-1", "file_path": "test_file" } ] } rag.insert_custom_kg(custom_kg) ```
Other Entity and Relation Operations - **create_entity**: Creates a new entity with specified attributes - **edit_entity**: Updates an existing entity's attributes or renames it - **create_relation**: Creates a new relation between existing entities - **edit_relation**: Updates an existing relation's attributes These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
## Delete Functions LightRAG provides comprehensive deletion capabilities, allowing you to delete documents, entities, and relationships.
Delete Entities You can delete entities by their name along with all associated relationships: ```python # Delete entity and all its relationships (synchronous version) rag.delete_by_entity("Google") # Asynchronous version await rag.adelete_by_entity("Google") ``` When deleting an entity: - Removes the entity node from the knowledge graph - Deletes all associated relationships - Removes related embedding vectors from the vector database - Maintains knowledge graph integrity
Delete Relations You can delete relationships between two specific entities: ```python # Delete relationship between two entities (synchronous version) rag.delete_by_relation("Google", "Gmail") # Asynchronous version await rag.adelete_by_relation("Google", "Gmail") ``` When deleting a relationship: - Removes the specified relationship edge - Deletes the relationship's embedding vector from the vector database - Preserves both entity nodes and their other relationships
Delete by Document ID You can delete an entire document and all its related knowledge through document ID: ```python # Delete by document ID (asynchronous version) await rag.adelete_by_doc_id("doc-12345") ``` Optimized processing when deleting by document ID: - **Smart Cleanup**: Automatically identifies and removes entities and relationships that belong only to this document - **Preserve Shared Knowledge**: If entities or relationships exist in other documents, they are preserved and their descriptions are rebuilt - **Cache Optimization**: Clears related LLM cache to reduce storage overhead - **Incremental Rebuilding**: Reconstructs affected entity and relationship descriptions from remaining documents The deletion process includes: 1. Delete all text chunks related to the document 2. Identify and delete entities and relationships that belong only to this document 3. Rebuild entities and relationships that still exist in other documents 4. Update all related vector indexes 5. Clean up document status records Note: Deletion by document ID is an asynchronous operation as it involves complex knowledge graph reconstruction processes.
**Important Reminders:** 1. **Irreversible Operations**: All deletion operations are irreversible, please use with caution 2. **Performance Considerations**: Deleting large amounts of data may take some time, especially deletion by document ID 3. **Data Consistency**: Deletion operations automatically maintain consistency between the knowledge graph and vector database 4. **Backup Recommendations**: Consider backing up data before performing important deletion operations **Batch Deletion Recommendations:** - For batch deletion operations, consider using asynchronous methods for better performance - For large-scale deletions, consider processing in batches to avoid excessive system load ## Entity Merging
Merge Entities and Their Relationships LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships: ```python # Basic entity merging rag.merge_entities( source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"], target_entity="AI Technology" ) ``` With custom merge strategy: ```python # Define custom merge strategy for different fields rag.merge_entities( source_entities=["John Smith", "Dr. Smith", "J. Smith"], target_entity="John Smith", merge_strategy={ "description": "concatenate", # Combine all descriptions "entity_type": "keep_first", # Keep the entity type from the first entity "source_id": "join_unique" # Combine all unique source IDs } ) ``` With custom target entity data: ```python # Specify exact values for the merged entity rag.merge_entities( source_entities=["New York", "NYC", "Big Apple"], target_entity="New York City", target_entity_data={ "entity_type": "LOCATION", "description": "New York City is the most populous city in the United States.", } ) ``` Advanced usage combining both approaches: ```python # Merge company entities with both strategy and custom data rag.merge_entities( source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"], target_entity="Microsoft", merge_strategy={ "description": "concatenate", # Combine all descriptions "source_id": "join_unique" # Combine source IDs }, target_entity_data={ "entity_type": "ORGANIZATION", } ) ``` When merging entities: * All relationships from source entities are redirected to the target entity * Duplicate relationships are intelligently merged * Self-relationships (loops) are prevented * Source entities are removed after merging * Relationship weights and attributes are preserved
## Multimodal Document Processing (RAG-Anything Integration) LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/RAG-Anything), a comprehensive **All-in-One Multimodal Document Processing RAG system** built specifically for LightRAG. RAG-Anything enables advanced parsing and retrieval-augmented generation (RAG) capabilities, allowing you to handle multimodal documents seamlessly and extract structured content—including text, images, tables, and formulas—from various document formats for integration into your RAG pipeline. **Key Features:** - **End-to-End Multimodal Pipeline**: Complete workflow from document ingestion and parsing to intelligent multimodal query answering - **Universal Document Support**: Seamless processing of PDFs, Office documents (DOC/DOCX/PPT/PPTX/XLS/XLSX), images, and diverse file formats - **Specialized Content Analysis**: Dedicated processors for images, tables, mathematical equations, and heterogeneous content types - **Multimodal Knowledge Graph**: Automatic entity extraction and cross-modal relationship discovery for enhanced understanding - **Hybrid Intelligent Retrieval**: Advanced search capabilities spanning textual and multimodal content with contextual understanding **Quick Start:** 1. Install RAG-Anything: ```bash pip install raganything ``` 2. Process multimodal documents:
RAGAnything Usage Example ```python import asyncio from raganything import RAGAnything from lightrag import LightRAG from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc import os async def load_existing_lightrag(): # First, create or load an existing LightRAG instance lightrag_working_dir = "./existing_lightrag_storage" # Check if previous LightRAG instance exists if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): print("✅ Found existing LightRAG instance, loading...") else: print("❌ No existing LightRAG instance found, will create new one") from functools import partial # Create/Load LightRAG instance with your configurations lightrag_instance = LightRAG( working_dir=lightrag_working_dir, llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="your-api-key", **kwargs, ), embedding_func=EmbeddingFunc( embedding_dim=3072, max_token_size=8192, model="text-embedding-3-large", func=partial( openai_embed.func, # Use .func to access the unwrapped function model="text-embedding-3-large", api_key=api_key, base_url=base_url, ), ) ) # Initialize storage (this will load existing data if available) await lightrag_instance.initialize_storages() # Now initialize RAGAnything with the existing LightRAG instance rag = RAGAnything( lightrag=lightrag_instance, # Pass the existing LightRAG instance # Only need vision model for multimodal processing vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( "gpt-4o", "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, {"role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} ]} if image_data else {"role": "user", "content": prompt} ], api_key="your-api-key", **kwargs, ) if image_data else openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="your-api-key", **kwargs, ) # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance ) # Query the existing knowledge base result = await rag.query_with_multimodal( "What data has been processed in this LightRAG instance?", mode="hybrid" ) print("Query result:", result) # Add new multimodal documents to the existing LightRAG instance await rag.process_document_complete( file_path="path/to/new/multimodal_document.pdf", output_dir="./output" ) if __name__ == "__main__": asyncio.run(load_existing_lightrag()) ```
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything). ## Token Usage Tracking
Overview and Usage LightRAG provides a TokenTracker tool to monitor and manage token consumption by large language models. This feature is particularly useful for controlling API costs and optimizing performance. ### Usage ```python from lightrag.utils import TokenTracker # Create TokenTracker instance token_tracker = TokenTracker() # Method 1: Using context manager (Recommended) # Suitable for scenarios requiring automatic token usage tracking with token_tracker: result1 = await llm_model_func("your question 1") result2 = await llm_model_func("your question 2") # Method 2: Manually adding token usage records # Suitable for scenarios requiring more granular control over token statistics token_tracker.reset() rag.insert() rag.query("your question 1", param=QueryParam(mode="naive")) rag.query("your question 2", param=QueryParam(mode="mix")) # Display total token usage (including insert and query operations) print("Token usage:", token_tracker.get_usage()) ``` ### Usage Tips - Use context managers for long sessions or batch operations to automatically track all token consumption - For scenarios requiring segmented statistics, use manual mode and call reset() when appropriate - Regular checking of token usage helps detect abnormal consumption early - Actively use this feature during development and testing to optimize production costs ### Practical Examples You can refer to these examples for implementing token tracking: - `examples/lightrag_gemini_track_token_demo.py`: Token tracking example using Google Gemini model - `examples/lightrag_siliconcloud_track_token_demo.py`: Token tracking example using SiliconCloud model These examples demonstrate how to effectively use the TokenTracker feature with different models and scenarios.
## Data Export Functions ### Overview LightRAG allows you to export your knowledge graph data in various formats for analysis, sharing, and backup purposes. The system supports exporting entities, relations, and relationship data. ### Export Functions
Basic Usage ```python # Basic CSV export (default format) rag.export_data("knowledge_graph.csv") # Specify any format rag.export_data("output.xlsx", file_format="excel") ```
Different File Formats supported ```python #Export data in CSV format rag.export_data("graph_data.csv", file_format="csv") # Export data in Excel sheet rag.export_data("graph_data.xlsx", file_format="excel") # Export data in markdown format rag.export_data("graph_data.md", file_format="md") # Export data in Text rag.export_data("graph_data.txt", file_format="txt") ```
Additional Options Include vector embeddings in the export (optional): ```python rag.export_data("complete_data.csv", include_vector_data=True) ```
### Data Included in Export All exports include: * Entity information (names, IDs, metadata) * Relation data (connections between entities) * Relationship information from vector database ## Cache
Clear Cache You can clear the configured LLM response cache storage with `aclear_cache()`. This API clears all cached entries in `llm_response_cache` and does not support selective cleanup by mode or cache type. ```python # Clear all cache await rag.aclear_cache() # Synchronous version rag.clear_cache() ``` For selective cleanup of query-related caches, use the `lightrag.tools.clean_llm_query_cache` tool and see the guide in [lightrag/tools/README_CLEAN_LLM_QUERY_CACHE.md](./lightrag/tools/README_CLEAN_LLM_QUERY_CACHE.md). It manages query caches and keywords caches for `mix`, `hybrid`, `local`, and `global` modes. It does not clean extraction caches such as `default:extract:*` and `default:summary:*`.
## Troubleshooting ### Common Initialization Errors If you encounter these errors when using LightRAG: 1. **`AttributeError: __aenter__`** - **Cause**: Storage backends not initialized - **Solution**: Call `await rag.initialize_storages()` after creating the LightRAG instance 2. **`KeyError: 'history_messages'`** - **Cause**: Pipeline status not initialized - **Solution**: Call `await rag.initialize_storages()` after creating the LightRAG instance 3. **Both errors in sequence** - **Cause**: Neither initialization method was called - **Solution**: Always follow this pattern: ```python rag = LightRAG(...) await rag.initialize_storages() ``` ### Model Switching Issues When switching between different embedding models, you must clear the data directory to avoid errors. The only file you may want to preserve is `kv_store_llm_response_cache.json` if you wish to retain the LLM cache. ## LightRAG API The LightRAG Server is designed to provide Web UI and API support. **For more information about LightRAG Server, please refer to [LightRAG Server](./lightrag/api/README.md).** ## Graph Visualization The LightRAG Server offers a comprehensive knowledge graph visualization feature. It supports various gravity layouts, node queries, subgraph filtering, and more. **For more information about LightRAG Server, please refer to [LightRAG Server](./lightrag/api/README.md).** ![iShot_2025-03-23_12.40.08](./README.assets/iShot_2025-03-23_12.40.08.png) ## Langfuse observability integration Langfuse provides a drop-in replacement for the OpenAI client that automatically tracks all LLM interactions, enabling developers to monitor, debug, and optimize their RAG systems without code changes. ### Installation with Langfuse option ``` pip install lightrag-hku pip install lightrag-hku[observability] # Or install from source code with debug mode enabled pip install -e . pip install -e ".[observability]" ``` ### Config Langfuse env vars modify .env file: ``` ## Langfuse Observability (Optional) # LLM observability and tracing platform # Install with: pip install lightrag-hku[observability] # Sign up at: https://cloud.langfuse.com or self-host LANGFUSE_SECRET_KEY="" LANGFUSE_PUBLIC_KEY="" LANGFUSE_HOST="https://cloud.langfuse.com" # or your self-hosted instance LANGFUSE_ENABLE_TRACE=true ``` ### Langfuse Usage Once installed and configured, Langfuse automatically traces all OpenAI LLM calls. Langfuse dashboard features include: - **Tracing**: View complete LLM call chains - **Analytics**: Token usage, latency, cost metrics - **Debugging**: Inspect prompts and responses - **Evaluation**: Compare model outputs - **Monitoring**: Real-time alerting ### Important Notice **Note**: LightRAG currently only integrates OpenAI-compatible API calls with Langfuse. APIs such as Ollama, Azure, and AWS Bedrock are not yet supported for Langfuse observability. ## RAGAS-based Evaluation **RAGAS** (Retrieval Augmented Generation Assessment) is a framework for reference-free evaluation of RAG systems using LLMs. There is an evaluation script based on RAGAS. For detailed information, please refer to [RAGAS-based Evaluation Framework](lightrag/evaluation/README_EVALUASTION_RAGAS.md). ## Evaluation ### Dataset The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain). ### Generate Query LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `examples/generate_query.py`.
Prompt ```python Given the following description of a dataset: {description} Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. Output the results in the following structure: - User 1: [user description] - Task 1: [task description] - Question 1: - Question 2: - Question 3: - Question 4: - Question 5: - Task 2: [task description] ... - Task 5: [task description] - User 2: [user description] ... - User 5: [user description] ... ```
### Batch Eval To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `reproduce/batch_eval.py`.
Prompt ```python ---Role--- You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. ---Goal--- You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. Here is the question: {query} Here are the two answers: **Answer 1:** {answer1} **Answer 2:** {answer2} Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. Output your evaluation in the following JSON format: {{ "Comprehensiveness": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]" }}, "Empowerment": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]" }}, "Overall Winner": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" }} }} ```
### Overall Performance Table ||**Agriculture**||**CS**||**Legal**||**Mix**|| |----------------------|---------------|------------|------|------------|---------|------------|-------|------------| ||NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**| |**Comprehensiveness**|32.4%|**67.6%**|38.4%|**61.6%**|16.4%|**83.6%**|38.8%|**61.2%**| |**Diversity**|23.6%|**76.4%**|38.0%|**62.0%**|13.6%|**86.4%**|32.4%|**67.6%**| |**Empowerment**|32.4%|**67.6%**|38.8%|**61.2%**|16.4%|**83.6%**|42.8%|**57.2%**| |**Overall**|32.4%|**67.6%**|38.8%|**61.2%**|15.2%|**84.8%**|40.0%|**60.0%**| ||RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**| |**Comprehensiveness**|31.6%|**68.4%**|38.8%|**61.2%**|15.2%|**84.8%**|39.2%|**60.8%**| |**Diversity**|29.2%|**70.8%**|39.2%|**60.8%**|11.6%|**88.4%**|30.8%|**69.2%**| |**Empowerment**|31.6%|**68.4%**|36.4%|**63.6%**|15.2%|**84.8%**|42.4%|**57.6%**| |**Overall**|32.4%|**67.6%**|38.0%|**62.0%**|14.4%|**85.6%**|40.0%|**60.0%**| ||HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**| |**Comprehensiveness**|26.0%|**74.0%**|41.6%|**58.4%**|26.8%|**73.2%**|40.4%|**59.6%**| |**Diversity**|24.0%|**76.0%**|38.8%|**61.2%**|20.0%|**80.0%**|32.4%|**67.6%**| |**Empowerment**|25.2%|**74.8%**|40.8%|**59.2%**|26.0%|**74.0%**|46.0%|**54.0%**| |**Overall**|24.8%|**75.2%**|41.6%|**58.4%**|26.4%|**73.6%**|42.4%|**57.6%**| ||GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**| |**Comprehensiveness**|45.6%|**54.4%**|48.4%|**51.6%**|48.4%|**51.6%**|**50.4%**|49.6%| |**Diversity**|22.8%|**77.2%**|40.8%|**59.2%**|26.4%|**73.6%**|36.0%|**64.0%**| |**Empowerment**|41.2%|**58.8%**|45.2%|**54.8%**|43.6%|**56.4%**|**50.8%**|49.2%| |**Overall**|45.2%|**54.8%**|48.0%|**52.0%**|47.2%|**52.8%**|**50.4%**|49.6%| ## Reproduce All the code can be found in the `./reproduce` directory. ### Step-0 Extract Unique Contexts First, we need to extract unique contexts in the datasets.
Code ```python def extract_unique_contexts(input_directory, output_directory): os.makedirs(output_directory, exist_ok=True) jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl')) print(f"Found {len(jsonl_files)} JSONL files.") for file_path in jsonl_files: filename = os.path.basename(file_path) name, ext = os.path.splitext(filename) output_filename = f"{name}_unique_contexts.json" output_path = os.path.join(output_directory, output_filename) unique_contexts_dict = {} print(f"Processing file: {filename}") try: with open(file_path, 'r', encoding='utf-8') as infile: for line_number, line in enumerate(infile, start=1): line = line.strip() if not line: continue try: json_obj = json.loads(line) context = json_obj.get('context') if context and context not in unique_contexts_dict: unique_contexts_dict[context] = None except json.JSONDecodeError as e: print(f"JSON decoding error in file {filename} at line {line_number}: {e}") except FileNotFoundError: print(f"File not found: {filename}") continue except Exception as e: print(f"An error occurred while processing file {filename}: {e}") continue unique_contexts_list = list(unique_contexts_dict.keys()) print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.") try: with open(output_path, 'w', encoding='utf-8') as outfile: json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4) print(f"Unique `context` entries have been saved to: {output_filename}") except Exception as e: print(f"An error occurred while saving to the file {output_filename}: {e}") print("All files have been processed.") ```
### Step-1 Insert Contexts For the extracted contexts, we insert them into the LightRAG system.
Code ```python def insert_text(rag, file_path): with open(file_path, mode='r') as f: unique_contexts = json.load(f) retries = 0 max_retries = 3 while retries < max_retries: try: rag.insert(unique_contexts) break except Exception as e: retries += 1 print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}") time.sleep(10) if retries == max_retries: print("Insertion failed after exceeding the maximum number of retries") ```
### Step-2 Generate Queries We extract tokens from the first and the second half of each context in the dataset, then combine them as dataset descriptions to generate queries.
Code ```python tokenizer = GPT2Tokenizer.from_pretrained('gpt2') def get_summary(context, tot_tokens=2000): tokens = tokenizer.tokenize(context) half_tokens = tot_tokens // 2 start_tokens = tokens[1000:1000 + half_tokens] end_tokens = tokens[-(1000 + half_tokens):1000] summary_tokens = start_tokens + end_tokens summary = tokenizer.convert_tokens_to_string(summary_tokens) return summary ```
### Step-3 Query For the queries generated in Step-2, we will extract them and query LightRAG.
Code ```python def extract_queries(file_path): with open(file_path, 'r') as f: data = f.read() data = data.replace('**', '') queries = re.findall(r'- Question \d+: (.+)', data) return queries ```
## 🔗 Related Projects *Ecosystem & Extensions*
📸
RAG-Anything
Multimodal RAG
🎥
VideoRAG
Extreme Long-Context Video RAG
MiniRAG
Extremely Simple RAG
--- ## ⭐ Star History Star History Chart ## 🤝 Contribution
We thank all our contributors for their valuable contributions.
--- ## 📖 Citation ```python @article{guo2024lightrag, title={LightRAG: Simple and Fast Retrieval-Augmented Generation}, author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang}, year={2024}, eprint={2410.05779}, archivePrefix={arXiv}, primaryClass={cs.IR} } ``` ---
Thank you for visiting LightRAG!
================================================ FILE: SECURITY.md ================================================ # Reporting Security Issues The LightRAG team and community take security bugs seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions. To report a security issue, please use the GitHub Security Advisory: [Report a Vulnerability](https://github.com/HKUDS/LightRAG/security/advisories/new) The LightRAG team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance. Report security bugs in third-party modules to the person or team maintaining the module. ### Supported Versions The following versions currently being supported with security updates. | Version | Supported | | ------- | ------------------ | | 1.2.x | :x: | | 1.3.x | :white_check_mark: | ================================================ FILE: config.ini.example ================================================ [neo4j] uri = neo4j+s://xxxxxxxx.databases.neo4j.io username = neo4j password = your-password connection_pool_size = 100 connection_timeout = 30.0 connection_acquisition_timeout = 30.0 max_transaction_retry_time = 30.0 max_connection_lifetime = 300.0 liveness_check_timeout = 30.0 keep_alive = true [mongodb] uri = mongodb+srv://name:password@your-cluster-address database = lightrag [redis] uri=redis://localhost:6379/1 [qdrant] uri = http://localhost:16333 [postgres] host = localhost port = 5432 user = your_username password = your_password database = your_database # workspace = default max_connections = 12 vector_index_type = HNSW # HNSW, IVFFLAT or VCHORDRQ hnsw_m = 16 hnsw_ef = 64 ivfflat_lists = 100 vchordrq_build_options = vchordrq_probes = vchordrq_epsilon = 1.9 [memgraph] uri = bolt://localhost:7687 [milvus] uri = http://localhost:19530 db_name = lightrag # user = root # password = your_password # token = your_token ================================================ FILE: docker-build-push.sh ================================================ #!/bin/bash set -e # Configuration IMAGE_NAME="ghcr.io/hkuds/lightrag" DOCKERFILE="Dockerfile" TAG="latest" # Get version from git tags VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "dev") echo "==================================" echo " Multi-Architecture Docker Build" echo "==================================" echo "Image: ${IMAGE_NAME}:${TAG}" echo "Version: ${VERSION}" echo "Platforms: linux/amd64, linux/arm64" echo "==================================" echo "" # Check Docker login status (skip if CR_PAT is set for CI/CD) if [ -z "$CR_PAT" ]; then if ! docker info 2>/dev/null | grep -q "Username"; then echo "⚠️ Warning: Not logged in to Docker registry" echo "Please login first: docker login ghcr.io" echo "Or set CR_PAT environment variable for automated login" echo "" read -p "Continue anyway? (y/n) " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1 fi fi else echo "Using CR_PAT environment variable for authentication" fi # Check if buildx builder exists, create if not if ! docker buildx ls | grep -q "desktop-linux"; then echo "Creating buildx builder..." docker buildx create --name desktop-linux --use docker buildx inspect --bootstrap else echo "Using existing buildx builder: desktop-linux" docker buildx use desktop-linux fi echo "" echo "Building and pushing multi-architecture image..." echo "" # Build and push docker buildx build \ --platform linux/amd64,linux/arm64 \ --file ${DOCKERFILE} \ --tag ${IMAGE_NAME}:${TAG} \ --tag ${IMAGE_NAME}:${VERSION} \ --push \ . echo "" echo "✓ Build and push complete!" echo "" echo "Images pushed:" echo " - ${IMAGE_NAME}:${TAG}" echo " - ${IMAGE_NAME}:${VERSION}" echo "" echo "Verifying multi-architecture manifest..." echo "" # Verify docker buildx imagetools inspect ${IMAGE_NAME}:${TAG} echo "" echo "✓ Verification complete!" echo "" echo "Pull with: docker pull ${IMAGE_NAME}:${TAG}" ================================================ FILE: docker-compose-full.yml ================================================ # Full Docker Compose Deployment Sample Generated by Setup Wizard: `make base` and `make storage` # This Sample File requires NVIDIA GPU for Milvus and VLLM services. # You can customize your setup using the Setup Wizard; for detailed instructions, please refer to docs/InteractiveSetup.md services: lightrag: image: ghcr.io/hkuds/lightrag:latest build: context: . dockerfile: Dockerfile tags: - ghcr.io/hkuds/lightrag:latest ports: - "${HOST:-0.0.0.0}:${PORT:-9621}:9621" volumes: - ./data/rag_storage:/app/data/rag_storage - ./data/inputs:/app/data/inputs - ./config.ini:/app/config.ini - ./.env:/app/.env deploy: restart_policy: condition: on-failure max_attempts: 10 extra_hosts: - "host.docker.internal:host-gateway" environment: MILVUS_URI: "http://milvus:19530" NEO4J_URI: "neo4j://neo4j:7687" POSTGRES_HOST: "postgres" POSTGRES_PORT: "5432" EMBEDDING_BINDING_HOST: "http://vllm-embed:8001/v1" RERANK_BINDING_HOST: "http://vllm-rerank:8000/rerank" WORKING_DIR: "/app/data/rag_storage" INPUT_DIR: "/app/data/inputs" MEMGRAPH_URI: "bolt://host.docker.internal:7687" HOST: "0.0.0.0" PORT: "9621" depends_on: vllm-embed: condition: service_healthy vllm-rerank: condition: service_healthy postgres: condition: service_healthy neo4j: condition: service_healthy milvus: condition: service_healthy milvus: image: milvusdb/milvus:v2.6.11-gpu command: ["milvus", "run", "standalone"] security_opt: - seccomp:unconfined environment: ETCD_ENDPOINTS: milvus-etcd:2379 MINIO_ADDRESS: milvus-minio:9000 MINIO_ACCESS_KEY_ID: "${MINIO_ACCESS_KEY_ID:?missing}" MINIO_SECRET_ACCESS_KEY: "${MINIO_SECRET_ACCESS_KEY:?missing}" # ports: # - "19530:19530" # - "9091:9091" volumes: - milvus_data:/var/lib/milvus deploy: resources: reservations: devices: - driver: nvidia capabilities: ["gpu"] healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' 19530)"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 10s timeout: 3s retries: 120 start_period: 10s depends_on: milvus-etcd: condition: service_healthy milvus-minio: condition: service_healthy restart: unless-stopped milvus-etcd: image: quay.io/coreos/etcd:v3.5.25 environment: ETCD_AUTO_COMPACTION_MODE: revision ETCD_AUTO_COMPACTION_RETENTION: "1000" ETCD_QUOTA_BACKEND_BYTES: "4294967296" ETCD_SNAPSHOT_COUNT: "50000" volumes: - milvus-etcd_data:/etcd command: > etcd -advertise-client-urls=http://0.0.0.0:2379 -listen-client-urls=http://0.0.0.0:2379 -data-dir /etcd healthcheck: test: ["CMD", "etcdctl", "endpoint", "health"] interval: 20s timeout: 20s retries: 3 restart: unless-stopped milvus-minio: image: minio/minio:RELEASE.2025-09-07T16-13-09Z environment: MINIO_ROOT_USER: "${MINIO_ACCESS_KEY_ID:?missing}" MINIO_ROOT_PASSWORD: "${MINIO_SECRET_ACCESS_KEY:?missing}" volumes: - milvus-minio_data:/minio_data command: minio server /minio_data --console-address ":9001" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] interval: 30s timeout: 20s retries: 3 restart: unless-stopped neo4j: image: neo4j:5-community # ports: # - "7474:7474" # - "${NEO4J_BOLT_PORT:-7687}:7687" volumes: - neo4j_data:/data healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' 7687)"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 10s timeout: 3s retries: 120 start_period: 10s restart: unless-stopped environment: NEO4J_AUTH: ${NEO4J_USERNAME:?missing}/${NEO4J_PASSWORD:?missing} NEO4J_dbms_default__database: "neo4j" postgres: image: gzdaniel/postgres-for-rag:16.6 command: ["sh", "-c", "service postgresql start && sleep infinity"] # ports: # - "5432:5432" volumes: - postgres_data:/var/lib/postgresql healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' 5432)"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 5s timeout: 3s retries: 120 start_period: 10s restart: unless-stopped environment: # The custom image featuring pre-installed AGE and pgvector extensions, including a pre-configured administrator account POSTGRES_USER: "rag" POSTGRES_PASSWORD: "rag" POSTGRES_DB: "rag" vllm-embed: image: vllm/vllm-openai:latest runtime: nvidia command: > --model ${VLLM_EMBED_MODEL:-BAAI/bge-m3} --port ${VLLM_EMBED_PORT:-8001} --dtype float16 --api-key ${VLLM_EMBED_API_KEY} ${VLLM_EMBED_EXTRA_ARGS:-} environment: NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all} NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility} ports: - "${VLLM_EMBED_PORT:-8001}:${VLLM_EMBED_PORT:-8001}" volumes: - vllm_embed_cache:/root/.cache/huggingface ipc: host healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' ${VLLM_EMBED_PORT:-8001})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 5s timeout: 3s retries: 120 start_period: 10s deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped vllm-rerank: image: vllm/vllm-openai:latest runtime: nvidia command: > --model ${VLLM_RERANK_MODEL:-BAAI/bge-reranker-v2-m3} --port ${VLLM_RERANK_PORT:-8000} --dtype float16 --api-key ${VLLM_RERANK_API_KEY} ${VLLM_RERANK_EXTRA_ARGS:-} environment: NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all} NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility} ports: - "${VLLM_RERANK_PORT:-8000}:${VLLM_RERANK_PORT:-8000}" volumes: - vllm_rerank_cache:/root/.cache/huggingface ipc: host healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' ${VLLM_RERANK_PORT:-8000})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 5s timeout: 3s retries: 120 start_period: 10s deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped volumes: milvus_data: milvus-etcd_data: milvus-minio_data: neo4j_data: postgres_data: vllm_embed_cache: vllm_rerank_cache: ================================================ FILE: docker-compose.yml ================================================ services: lightrag: image: ghcr.io/hkuds/lightrag:latest build: context: . dockerfile: Dockerfile tags: - ghcr.io/hkuds/lightrag:latest ports: - "${HOST:-0.0.0.0}:${PORT:-9621}:9621" volumes: - ./data/rag_storage:/app/data/rag_storage - ./data/inputs:/app/data/inputs - ./config.ini:/app/config.ini - ./.env:/app/.env deploy: restart_policy: condition: on-failure max_attempts: 10 extra_hosts: - "host.docker.internal:host-gateway" environment: WORKING_DIR: "/app/data/rag_storage" INPUT_DIR: "/app/data/inputs" HOST: "0.0.0.0" PORT: "9621" ================================================ FILE: docs/Algorithm.md ================================================ ![LightRAG Indexing Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-VectorDB-Json-KV-Store-Indexing-Flowchart-scaled.jpg) *Figure 1: LightRAG Indexing Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)* ![LightRAG Retrieval and Querying Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-Querying-Flowchart-Dual-Level-Retrieval-Generation-Knowledge-Graphs-scaled.jpg) *Figure 2: LightRAG Retrieval and Querying Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)* ================================================ FILE: docs/DockerDeployment.md ================================================ # LightRAG Docker Deployment A lightweight Knowledge Graph Retrieval-Augmented Generation system with multiple LLM backend support. ## 🚀 Preparation ### Clone the repository: ```bash # Linux/MacOS git clone https://github.com/HKUDS/LightRAG.git cd LightRAG ``` ```powershell # Windows PowerShell git clone https://github.com/HKUDS/LightRAG.git cd LightRAG ``` ### Configure your environment: ```bash # Linux/MacOS cp .env.example .env # Edit .env with your preferred configuration ``` ```powershell # Windows PowerShell Copy-Item .env.example .env # Edit .env with your preferred configuration ``` LightRAG can be configured using environment variables in the `.env` file: **Server Configuration** - `HOST`: Server host (default: 0.0.0.0) - `PORT`: Server port (default: 9621) **LLM Configuration** - `LLM_BINDING`: LLM backend to use (lollms/ollama/openai) - `LLM_BINDING_HOST`: LLM server host URL - `LLM_MODEL`: Model name to use **Embedding Configuration** - `EMBEDDING_BINDING`: Embedding backend (lollms/ollama/openai) - `EMBEDDING_BINDING_HOST`: Embedding server host URL - `EMBEDDING_MODEL`: Embedding model name **RAG Configuration** - `MAX_ASYNC`: Maximum async operations - `MAX_TOKENS`: Maximum token size - `EMBEDDING_DIM`: Embedding dimensions ## 🐳 Docker Deployment Docker instructions work the same on all platforms with Docker Desktop installed. ### Build Optimization The Dockerfile uses BuildKit cache mounts to significantly improve build performance: - **Automatic cache management**: BuildKit is automatically enabled via `# syntax=docker/dockerfile:1` directive - **Faster rebuilds**: Only downloads changed dependencies when `uv.lock` or `bun.lock` files are modified - **Efficient package caching**: UV and Bun package downloads are cached across builds - **No manual configuration needed**: Works out of the box in Docker Compose and GitHub Actions ### Start LightRAG server: ```bash docker compose up -d ``` If you used the interactive setup, start the generated stack with: ```bash docker compose -f docker-compose.final.yml up -d ``` The interactive setup keeps `.env` host-usable. Container-only hostnames such as `postgres` or `host.docker.internal`, along with staged SSL paths under `/app/data/certs/`, are injected into the generated `docker-compose.final.yml` for the `lightrag` service instead of being persisted back into `.env`. On reruns, unchanged wizard-managed service blocks in `docker-compose.final.yml` are preserved by default. To repair or fully regenerate those managed blocks from the bundled templates, rerun the matching setup target with `make env-base-rewrite` or `make env-storage-rewrite`. If the generated stack includes local Milvus, compose resolves `MINIO_ACCESS_KEY_ID` and `MINIO_SECRET_ACCESS_KEY` at startup from the repo `.env` or exported shell environment. The generated compose file does not snapshot those values, and `docker compose` exits immediately if either variable is missing. Before exposing the generated stack beyond localhost, run: ```bash make env-security-check ``` That command audits the current `.env` for missing authentication, unsafe whitelist settings, weak JWT secrets, and other setup-level security risks without rewriting any files. LightRAG Server uses the following paths for data storage: ``` data/ ├── rag_storage/ # RAG data persistence └── inputs/ # Input documents ``` ### Optional: local vLLM embedding and reranker To run embedding and/or reranking locally with vLLM, run `make env-base` and answer `yes` when prompted to run the embedding model and rerank service locally via Docker. That configures the embedding service to use `BAAI/bge-m3` on port 8001 with a local vLLM server, and can also add a `vllm-rerank` service on port 8000. Alternatively, rerun `make env-base` later and enable only the rerank Docker prompt to add the `vllm-rerank` service automatically. vLLM provides a `v1/rerank` endpoint that works with the `cohere` binding. Example `docker-compose.override.yml` for GPU hosts (embedding + reranker): ```yaml services: vllm-embed: image: vllm/vllm-openai:latest runtime: nvidia command: > --model BAAI/bge-m3 --port 8001 --dtype float16 ports: - "8001:8001" volumes: - ./data/hf-cache:/root/.cache/huggingface ipc: host deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] vllm-rerank: image: vllm/vllm-openai:latest runtime: nvidia command: > --model BAAI/bge-reranker-v2-m3 --port 8000 --dtype float16 ports: - "8000:8000" volumes: - ./data/hf-cache:/root/.cache/huggingface ipc: host deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] ``` For CPU-only hosts, use the official CPU image instead: ```yaml services: vllm-embed: image: vllm/vllm-openai-cpu:latest command: > --model BAAI/bge-m3 --port 8001 --dtype float32 ports: - "8001:8001" volumes: - ./data/hf-cache:/root/.cache/huggingface vllm-rerank: image: vllm/vllm-openai-cpu:latest command: > --model BAAI/bge-reranker-v2-m3 --port 8000 --dtype float32 ports: - "8000:8000" volumes: - ./data/hf-cache:/root/.cache/huggingface ``` Add the embedding and rerank config to `.env`: ```bash EMBEDDING_BINDING=openai EMBEDDING_MODEL=BAAI/bge-m3 EMBEDDING_DIM=1024 EMBEDDING_BINDING_HOST=http://localhost:8001/v1 EMBEDDING_BINDING_API_KEY=local-key VLLM_EMBED_DEVICE=cpu RERANK_BINDING=cohere RERANK_MODEL=BAAI/bge-reranker-v2-m3 RERANK_BINDING_HOST=http://localhost:8000/rerank RERANK_BINDING_API_KEY=local-key VLLM_RERANK_DEVICE=cpu ``` If LightRAG runs in Docker while vLLM runs on the host, the generated compose file rewrites those endpoints to: ```bash EMBEDDING_BINDING_HOST=http://host.docker.internal:8001/v1 RERANK_BINDING_HOST=http://host.docker.internal:8000/rerank ``` For GPU, set: ```bash VLLM_EMBED_DEVICE=cuda VLLM_RERANK_DEVICE=cuda ``` Ensure the NVIDIA Container Toolkit is installed and the host has CUDA drivers available. The setup wizard uses the CPU image by default for `cpu` device and the GPU image for `cuda` device. When rerunning `make env-base`, an existing `VLLM_EMBED_DEVICE` / `VLLM_RERANK_DEVICE` value is preserved instead of being overwritten by a fresh GPU auto-detection result. Those templates already pin the matching vLLM `--dtype` (`float32` on CPU, `float16` on CUDA), so no separate `VLLM_*_DTYPE` environment variables are needed. ### SSL certificates The setup wizard stages TLS certificate files under `./data/certs/` before generating the compose file. This keeps generated host mounts under the same `./data` root used by the default Docker deployment. ### PostgreSQL image The interactive setup defaults PostgreSQL to `gzdaniel/postgres-for-rag:16.6`. That image bundles both Apache AGE and pgvector so the generated stack works with `PGGraphStorage` and `PGVectorStorage` without extra extension setup. ### Updates To update the Docker container: ```bash docker compose pull docker compose down docker compose up ``` ### Offline deployment Software packages requiring `transformers`, `torch`, or `cuda` will is not preinstalled in the dokcer images. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, can not be used in an off line enviroment. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service. ## 📦 Build Docker Images ### For local development and testing ```bash # Build and run with Docker Compose (BuildKit automatically enabled) docker compose up --build # Or explicitly enable BuildKit if needed DOCKER_BUILDKIT=1 docker compose up --build ``` **Note**: BuildKit is automatically enabled by the `# syntax=docker/dockerfile:1` directive in the Dockerfile, ensuring optimal caching performance. ### For production release **multi-architecture build and push**: ```bash # Use the provided build script ./docker-build-push.sh ``` **The build script will**: - Check Docker registry login status - Create/use buildx builder automatically - Build for both AMD64 and ARM64 architectures - Push to GitHub Container Registry (ghcr.io) - Verify the multi-architecture manifest **Prerequisites**: Before building multi-architecture images, ensure you have: - Docker 20.10+ with Buildx support - Sufficient disk space (20GB+ recommended for offline image) - Registry access credentials (if pushing images) ================================================ FILE: docs/FrontendBuildGuide.md ================================================ # Frontend Build Guide ## Overview The LightRAG project includes a React-based WebUI frontend. This guide explains how frontend building works in different scenarios. ## Key Principle - **Git Repository**: Frontend build results are **NOT** included (kept clean) - **PyPI Package**: Frontend build results **ARE** included (ready to use) - **Build Tool**: **Bun** is recommended, but **Node.js/npm** is fully supported as a fallback ## Installation Scenarios ### 1. End Users (From PyPI) ✨ **Command:** ```bash pip install lightrag-hku[api] ``` **What happens:** - Frontend is already built and included in the package - No additional steps needed - Web interface works immediately --- ### 2. Development Mode (Recommended for Contributors) 🔧 **Command:** ```bash # Clone the repository git clone https://github.com/HKUDS/LightRAG.git cd LightRAG # Install in editable mode (no frontend build required yet) pip install -e ".[api]" # Build frontend when needed (can be done anytime) cd lightrag_webui bun install --frozen-lockfile bun run build cd .. ``` **Advantages:** - Install first, build later (flexible workflow) - Changes take effect immediately (symlink mode) - Frontend can be rebuilt anytime without reinstalling **How it works:** - Creates symlinks to source directory - Frontend build output goes to `lightrag/api/webui/` - Changes are immediately visible in installed package --- ### 3. Normal Installation (Testing Package Build) 📦 **Command:** ```bash # Clone the repository git clone https://github.com/HKUDS/LightRAG.git cd LightRAG # ⚠️ MUST build frontend FIRST cd lightrag_webui bun install --frozen-lockfile bun run build cd .. # Now install pip install ".[api]" ``` **What happens:** - Frontend files are **copied** to site-packages - Post-build modifications won't affect installed package - Requires rebuild + reinstall to update **When to use:** - Testing complete installation process - Verifying package configuration - Simulating PyPI user experience --- ### 4. Creating Distribution Package 🚀 **Command:** ```bash # Build frontend first cd lightrag_webui bun install --frozen-lockfile --production bun run build cd .. # Create distribution packages python -m build # Output: dist/lightrag_hku-*.whl and dist/lightrag_hku-*.tar.gz ``` **What happens:** - `setup.py` checks if frontend is built - If missing, installation fails with helpful error message - Generated package includes all frontend files --- ## GitHub Actions (Automated Release) When creating a release on GitHub: 1. **Automatically builds frontend** using Bun 2. **Verifies** build completed successfully 3. **Creates Python package** with frontend included 4. **Publishes to PyPI** using existing trusted publisher setup **No manual intervention required!** --- ## Quick Reference | Scenario | Command | Frontend Required | Can Build After | |----------|---------|-------------------|-----------------| | From PyPI | `pip install lightrag-hku[api]` | Included | No (already installed) | | Development | `pip install -e ".[api]"` | No | ✅ Yes (anytime) | | Normal Install | `pip install ".[api]"` | ✅ Yes (before) | No (must reinstall) | | Create Package | `python -m build` | ✅ Yes (before) | N/A | --- ## Bun Installation If you don't have Bun installed: ```bash # macOS/Linux curl -fsSL https://bun.sh/install | bash # Windows powershell -c "irm bun.sh/install.ps1 | iex" ``` Official documentation: https://bun.sh --- ## File Structure ``` LightRAG/ ├── lightrag_webui/ # Frontend source code │ ├── src/ # React components │ ├── package.json # Dependencies │ └── vite.config.ts # Build configuration │ └── outDir: ../lightrag/api/webui # Build output │ ├── lightrag/ │ └── api/ │ └── webui/ # Frontend build output (gitignored) │ ├── index.html # Built files (after running bun run build) │ └── assets/ # Built assets │ ├── setup.py # Build checks ├── pyproject.toml # Package configuration └── .gitignore # Excludes lightrag/api/webui/* (except .gitkeep) ``` --- ## Troubleshooting ### Q: I installed in development mode but the web interface doesn't work **A:** Build the frontend: ```bash cd lightrag_webui && bun run build ``` ### Q: I built the frontend but it's not in my installed package **A:** You probably used `pip install .` after building. Either: - Use `pip install -e ".[api]"` for development - Or reinstall: `pip uninstall lightrag-hku && pip install ".[api]"` ### Q: Where are the built frontend files? **A:** In `lightrag/api/webui/` after running `bun run build` ### Q: Can I use npm or yarn instead of Bun? **A:** Yes. The build scripts (`dev`, `build`, `preview`, `lint`) are runtime-agnostic and work with both Bun and Node.js/npm: ```bash npm install npm run build ``` Bun is recommended for speed, but npm is fully supported. Tests (`bun test`) still require Bun. ### Q: Build fails with `Cannot find package '@/lib'` **A:** This was caused by `vite.config.ts` using a TypeScript path alias (`@/`) that only Bun could resolve at config load time. Update to the latest version where this is fixed with a relative import. --- ## Summary ✅ **PyPI users**: No action needed, frontend included ✅ **Developers**: Use `pip install -e ".[api]"`, build frontend when needed ✅ **CI/CD**: Automatic build in GitHub Actions ✅ **Git**: Frontend build output never committed For questions or issues, please open a GitHub issue. ================================================ FILE: docs/InteractiveSetup.md ================================================ # Interactive Setup Guide Use the interactive setup wizard when you want LightRAG to guide you through the configuration instead of editing `.env` by hand. The wizard is exposed through `make` targets: - `make env-base` - `make env-storage` - `make env-server` - `make env-validate` - `make env-security-check` - `make env-backup` - `make env-base-rewrite` - `make env-storage-rewrite` You do not need to call the underlying shell script directly. ## What This Wizard Is For The setup wizard helps you configure LightRAG in three parts: - `env-base` sets up the LLM, embedding model, and optional reranker. - `env-storage` adds or changes storage backends such as PostgreSQL, Neo4j, Redis, Milvus, Qdrant, MongoDB, or Memgraph. - `env-server` sets server host and port, WebUI labels, authentication, API keys, and SSL. You can rerun each step later. The wizard loads your existing `.env` and shows current values as defaults, so you only need to change what is different. ## Before You Start - Run commands from the repository root. - The `make env-*` targets automatically choose a compatible Bash 4+ interpreter. - Use the documented `make env-*` targets rather than invoking the setup script yourself. - `make env-base` is the normal starting point because it creates the initial `.env`. - `make env-storage` and `make env-server` require an existing `.env`. - If you choose any wizard-managed Docker service, the wizard also prepares LightRAG for the Docker startup path. ## Choose Your Setup Path Use this quick guide to decide what to run: - I want the fastest first run with remote model providers: `make env-base` - I want embedding or reranking to run locally in Docker: `make env-base` - I already configured models and now want databases: `make env-storage` - I already configured models and now want auth, API keys, or SSL: `make env-server` - I want to check whether my current setup is valid: `make env-validate` - I want to audit my current setup before exposing it: `make env-security-check` - I want a standalone backup without changing configuration: `make env-backup` - I need to repair the generated compose services from the bundled templates: `make env-base-rewrite` or `make env-storage-rewrite` ## Scenario 1: First-Time Local Setup Use this when you want LightRAG running with the least amount of setup and you already have remote model endpoints or API keys. **Command** ```bash make env-base ``` **What the wizard asks** - LLM provider, model, endpoint, and API key - Whether the embedding model should run locally via Docker - If embedding stays remote: embedding provider, model, dimension, endpoint, and API key - Whether reranking should be enabled - If reranking is enabled: whether the rerank service should run locally via Docker - If reranking stays remote: rerank provider, model, endpoint, and API key **What gets written** - `.env` - `docker-compose.final.yml` only if you enabled wizard-managed Docker services **What to do next** - If you did not enable wizard-managed Docker services: ```bash lightrag-server ``` - If you enabled wizard-managed Docker services: ```bash docker compose -f docker-compose.final.yml up -d ``` ## Scenario 2: Local Setup With Docker-Hosted Embedding or Rerank Use this when you want LightRAG to run local inference services for embedding and/or reranking through Docker. **Command** ```bash make env-base ``` **Recommended answers** - Answer `yes` to `Run embedding model locally via Docker (vLLM)?` if you want local embeddings - Answer `yes` to `Enable reranking?` and then `yes` to `Run rerank service locally via Docker?` if you want local reranking **What the wizard asks after you enable local services** - Embedding model name for local vLLM - Rerank model name for local vLLM - Remote LLM details if your main LLM is still external **What gets written** - `.env` - `docker-compose.final.yml` with the selected local services **What to do next** ```bash docker compose -f docker-compose.final.yml up -d ``` This starts the generated Docker-based LightRAG stack together with the selected local services. ## Scenario 3: Add Storage After The Base Setup Use this when you already have `.env` from `make env-base` and now want to switch from default local-file storage to database-backed storage. **Command** ```bash make env-storage ``` **Prerequisite** - `.env` must already exist **What the wizard asks** - KV storage backend - Vector storage backend - Graph storage backend - Doc-status storage backend - For each required database, whether it should run locally via Docker - For each required database, the needed connection details such as host, URI, port, user, password, database name, or device type **Important rule** - If you choose `MongoVectorDBStorage` for vector storage, the wizard does not offer the bundled local Docker MongoDB service. You must provide a MongoDB deployment that supports Atlas Search / Vector Search. **What gets written** - `.env` - `docker-compose.final.yml` if you selected wizard-managed storage services **What to do next** - If you selected Docker-managed storage services: ```bash docker compose -f docker-compose.final.yml up -d ``` - If you pointed LightRAG at external databases, make sure those services are reachable before starting LightRAG. ## Scenario 4: Harden A Deployment With Auth And SSL Use this when you already have `.env` and need to prepare the server for shared or external use. **Commands** ```bash make env-server make env-security-check ``` **Prerequisite** - `.env` must already exist **What `env-server` asks** - Server host and port - WebUI title and description - Summary language - Whether to configure authentication and API key settings - Auth accounts, JWT secret, token lifetime, API key, and whitelist paths - Whether to enable SSL/TLS - SSL certificate file path and SSL key file path **What gets written** - `.env` - `docker-compose.final.yml` may be updated if your current setup already uses wizard-managed Docker services **What to do next** - Run `make env-security-check` - If the stack uses Docker, recreate the LightRAG service with your compose file - If the stack runs on the host, restart `lightrag-server` For broader deployment guidance, see [DockerDeployment.md](/Users/ydh/mycode/ai/paper-RAG/docs/DockerDeployment.md). ## Validate, Audit, And Backup These commands do not walk you through a full setup flow, but they are part of normal operations. ### Validate The Current Configuration ```bash make env-validate ``` Use this when you want to confirm that the current `.env` is internally consistent. It reports problems such as missing required values, malformed auth settings, invalid URIs, invalid ports, or missing SSL files. ### Audit Security Before Exposure ```bash make env-security-check ``` Use this before exposing LightRAG beyond localhost. It reports risky setups such as missing authentication, weak or missing JWT secrets, unsafe whitelist settings, or unresolved sensitive placeholders. ### Create A Standalone Backup ```bash make env-backup ``` Use this when you want a manual backup without running any setup flow. ## Outputs And What They Mean ### `.env` The wizard writes `.env` in the repository root. This file becomes the current runtime configuration produced by the latest wizard run. In practice, this means: - rerunning the wizard updates `.env` - existing values are reused as defaults on later runs - you should treat `.env` as the active configuration for the workflow you most recently configured - before `env-base`, `env-storage`, or `env-server` writes `.env`, the wizard automatically creates a timestamped backup of the existing file when one is present ### `docker-compose.final.yml` The wizard creates or updates `docker-compose.final.yml` only when you choose wizard-managed Docker services or when an existing wizard-generated compose setup needs to stay aligned with new server settings. When one of the setup flows is about to replace or remove an existing generated compose file, it automatically creates a timestamped backup first. Use this file when starting the generated Docker stack: ```bash docker compose -f docker-compose.final.yml up -d ``` The base `docker-compose.yml` remains the general project compose file. The generated `docker-compose.final.yml` is the wizard-managed output. ## Troubleshooting And Advanced Notes - If `make env-storage` or `make env-server` says `.env` is missing, run `make env-base` first. - You do not need to run `make env-backup` before rerunning `env-base`, `env-storage`, or `env-server`; those flows already back up the existing `.env`, and they also back up the generated compose file before changing it. - If you need to fully rebuild wizard-managed compose services from the current bundled templates, use `make env-base-rewrite` or `make env-storage-rewrite`. - If you switch between host-oriented and Docker-oriented workflows, rerun the relevant setup step instead of trying to manually merge old settings. - If the generated stack includes local Milvus, make sure `MINIO_ACCESS_KEY_ID` and `MINIO_SECRET_ACCESS_KEY` are available before running `docker compose -f docker-compose.final.yml up -d`. - For Docker deployment details beyond the interactive wizard, see [DockerDeployment.md](/Users/ydh/mycode/ai/paper-RAG/docs/DockerDeployment.md). ## Typical Command Sequences ### Remote models, local server ```bash make env-base lightrag-server ``` ### Remote LLM, local embedding and rerank in Docker ```bash make env-base docker compose -f docker-compose.final.yml up -d ``` ### Add storage after the base setup ```bash make env-base make env-storage docker compose -f docker-compose.final.yml up -d ``` ### Add security and SSL before exposure ```bash make env-base make env-storage make env-server make env-security-check docker compose -f docker-compose.final.yml up -d ``` ================================================ FILE: docs/LightRAG_concurrent_explain.md ================================================ ## LightRAG Multi-Document Processing: Concurrent Control Strategy LightRAG employs a multi-layered concurrent control strategy when processing multiple documents. This article provides an in-depth analysis of the concurrent control mechanisms at document level, chunk level, and LLM request level, helping you understand why specific concurrent behaviors occur. ### 1. Document-Level Concurrent Control **Control Parameter**: `max_parallel_insert` This parameter controls the number of documents processed simultaneously. The purpose is to prevent excessive parallelism from overwhelming system resources, which could lead to extended processing times for individual files. Document-level concurrency is governed by the `max_parallel_insert` attribute within LightRAG, which defaults to 2 and is configurable via the `MAX_PARALLEL_INSERT` environment variable. `max_parallel_insert` is recommended to be set between 2 and 10, typically `llm_model_max_async/3`. Setting this value too high can increase the likelihood of naming conflicts among entities and relationships across different documents during the merge phase, thereby reducing its overall efficiency. ### 2. Chunk-Level Concurrent Control **Control Parameter**: `llm_model_max_async` This parameter controls the number of chunks processed simultaneously in the extraction stage within a document. The purpose is to prevent a high volume of concurrent requests from monopolizing LLM processing resources, which would impede the efficient parallel processing of multiple files. Chunk-Level Concurrent Control is governed by the `llm_model_max_async` attribute within LightRAG, which defaults to 4 and is configurable via the `MAX_ASYNC` environment variable. The purpose of this parameter is to fully leverage the LLM's concurrency capabilities when processing individual documents. In the `extract_entities` function, **each document independently creates** its own chunk semaphore. Since each document independently creates chunk semaphores, the theoretical chunk concurrency of the system is: $$ ChunkConcurrency = Max Parallel Insert × LLM Model Max Async $$ For example: - `max_parallel_insert = 2` (process 2 documents simultaneously) - `llm_model_max_async = 4` (maximum 4 chunk concurrency per document) - Theoretical chunk-level concurrent: 2 × 4 = 8 ### 3. Graph-Level Concurrent Control **Control Parameter**: `llm_model_max_async * 2` This parameter controls the number of entities and relations processed simultaneously in the merging stage within a document. The purpose is to prevent a high volume of concurrent requests from monopolizing LLM processing resources, which would impede the efficient parallel processing of multiple files. Graph-level concurrency is governed by the `llm_model_max_async` attribute within LightRAG, which defaults to 4 and is configurable via the `MAX_ASYNC` environment variable. Graph-level parallelism control parameters are equally applicable to managing parallelism during the entity relationship reconstruction phase after document deletion. Given that the entity relationship merging phase doesn't necessitate LLM interaction for every operation, its parallelism is set at double the LLM's parallelism. This optimizes machine utilization while concurrently preventing excessive queuing resource contention for the LLM. ### 4. LLM-Level Concurrent Control **Control Parameter**: `llm_model_max_async` This parameter governs the **concurrent volume** of LLM requests dispatched by the entire LightRAG system, encompassing the document extraction stage, merging stage, and user query handling. LLM request prioritization is managed via a global priority queue, which **systematically prioritizes user queries** over merging-related requests, and merging-related requests over extraction-related requests. This strategic prioritization **minimizes user query latency**. LLM-level concurrency is governed by the `llm_model_max_async` attribute within LightRAG, which defaults to 4 and is configurable via the `MAX_ASYNC` environment variable. ### 5. Complete Concurrent Hierarchy Diagram ```mermaid graph TD classDef doc fill:#e6f3ff,stroke:#5b9bd5,stroke-width:2px; classDef chunk fill:#fbe5d6,stroke:#ed7d31,stroke-width:1px; classDef merge fill:#e2f0d9,stroke:#70ad47,stroke-width:2px; A["Multiple Documents
max_parallel_insert = 2"] --> A1 A --> B1 A1[DocA: split to n chunks] --> A_chunk; B1[DocB: split to m chunks] --> B_chunk; subgraph A_chunk[Extraction Stage] A_chunk_title[Entity Relation Extraction
llm_model_max_async = 4]; A_chunk_title --> A_chunk1[Chunk A1]:::chunk; A_chunk_title --> A_chunk2[Chunk A2]:::chunk; A_chunk_title --> A_chunk3[Chunk A3]:::chunk; A_chunk_title --> A_chunk4[Chunk A4]:::chunk; A_chunk1 & A_chunk2 & A_chunk3 & A_chunk4 --> A_chunk_done([Extraction Complete]); end subgraph B_chunk[Extraction Stage] B_chunk_title[Entity Relation Extraction
llm_model_max_async = 4]; B_chunk_title --> B_chunk1[Chunk B1]:::chunk; B_chunk_title --> B_chunk2[Chunk B2]:::chunk; B_chunk_title --> B_chunk3[Chunk B3]:::chunk; B_chunk_title --> B_chunk4[Chunk B4]:::chunk; B_chunk1 & B_chunk2 & B_chunk3 & B_chunk4 --> B_chunk_done([Extraction Complete]); end A_chunk -.->|LLM Request| LLM_Queue; A_chunk --> A_merge; B_chunk --> B_merge; subgraph A_merge[Merge Stage] A_merge_title[Entity Relation Merging
llm_model_max_async * 2 = 8]; A_merge_title --> A1_entity[Ent a1]:::merge; A_merge_title --> A2_entity[Ent a2]:::merge; A_merge_title --> A3_entity[Rel a3]:::merge; A_merge_title --> A4_entity[Rel a4]:::merge; A1_entity & A2_entity & A3_entity & A4_entity --> A_done([Merge Complete]) end subgraph B_merge[Merge Stage] B_merge_title[Entity Relation Merging
llm_model_max_async * 2 = 8]; B_merge_title --> B1_entity[Ent b1]:::merge; B_merge_title --> B2_entity[Ent b2]:::merge; B_merge_title --> B3_entity[Rel b3]:::merge; B_merge_title --> B4_entity[Rel b4]:::merge; B1_entity & B2_entity & B3_entity & B4_entity --> B_done([Merge Complete]) end A_merge -.->|LLM Request| LLM_Queue["LLM Request Prioritized Queue
llm_model_max_async = 4"]; B_merge -.->|LLM Request| LLM_Queue; B_chunk -.->|LLM Request| LLM_Queue; ``` > The extraction and merge stages share a global prioritized LLM queue, regulated by `llm_model_max_async`. While numerous entity and relation extraction and merging operations may be "actively processing", **only a limited number will concurrently execute LLM requests** the remainder will be queued and awaiting their turn. ### 6. Performance Optimization Recommendations * **Increase LLM Concurrent Setting based on the capabilities of your LLM server or API provider** During the file processing phase, the performance and concurrency capabilities of the LLM are critical bottlenecks. When deploying LLMs locally, the service's concurrency capacity must adequately account for the context length requirements of LightRAG. LightRAG recommends that LLMs support a minimum context length of 32KB; therefore, server concurrency should be calculated based on this benchmark. For API providers, LightRAG will retry requests up to three times if the client's request is rejected due to concurrent request limits. Backend logs can be used to determine if LLM retries are occurring, thereby indicating whether `MAX_ASYNC` has exceeded the API provider's limits. * **Align Parallel Document Insertion Settings with LLM Concurrency Configurations** The recommended number of parallel document processing tasks is 1/4 of the LLM's concurrency, with a minimum of 2 and a maximum of 10. Setting a higher number of parallel document processing tasks typically does not accelerate overall document processing speed, as even a small number of concurrently processed documents can fully utilize the LLM's parallel processing capabilities. Excessive parallel document processing can significantly increase the processing time for each individual document. Since LightRAG commits processing results on a file-by-file basis, a large number of concurrent files would necessitate caching a substantial amount of data. In the event of a system error, all documents in the middle stage would require reprocessing, thereby increasing error handling costs. For instance, setting `MAX_PARALLEL_INSERT` to 3 is appropriate when `MAX_ASYNC` is configured to 12. ================================================ FILE: docs/MilvusConfigurationGuide.md ================================================ # Milvus Configuration via vector_db_storage_cls_kwargs ## Overview Milvus index parameters can be configured through `vector_db_storage_cls_kwargs`, which is the **recommended approach** for framework integration scenarios (e.g., when using RAGAnything or other frameworks built on top of LightRAG). ## Why Use vector_db_storage_cls_kwargs? ✅ **Framework Integration**: Allows configuration to be passed through framework layers without environment variable changes ✅ **Programmatic Configuration**: Set parameters in code rather than relying on environment variables ✅ **Dynamic Configuration**: Different configurations for different RAG instances ✅ **Clean API**: All parameters passed in one place during initialization ## Supported Parameters All 11 MilvusIndexConfig parameters can be configured via `vector_db_storage_cls_kwargs`: ### Base Configuration - `index_type`: Index type (AUTOINDEX, HNSW, HNSW_SQ, IVF_FLAT, etc.) - `metric_type`: Distance metric (COSINE, L2, IP) ### HNSW Parameters - `hnsw_m`: Number of connections per layer (2-2048, default: 16) - `hnsw_ef_construction`: Size of dynamic candidate list during construction (default: 360) - `hnsw_ef`: Size of dynamic candidate list during search (default: 200) ### HNSW_SQ Parameters (requires Milvus 2.6.8+) - `sq_type`: Quantization type (SQ4U, SQ6, SQ8, BF16, FP16, default: SQ8) - `sq_refine`: Enable refinement (default: False) - `sq_refine_type`: Refinement type (SQ6, SQ8, BF16, FP16, FP32, default: FP32) - `sq_refine_k`: Number of candidates to refine (default: 10) ### IVF Parameters - `ivf_nlist`: Number of cluster units (1-65536, default: 1024) - `ivf_nprobe`: Number of units to query (default: 16) ## Configuration Priority Configuration is resolved in the following order: 1. **Parameters passed via vector_db_storage_cls_kwargs** (highest priority) 2. Environment variables (MILVUS_INDEX_TYPE, etc.) 3. Default values ## Usage Examples ### Basic Configuration ```python from lightrag import LightRAG rag = LightRAG( working_dir="./demo", vector_storage="MilvusVectorDBStorage", vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": 0.2, "index_type": "HNSW", "metric_type": "COSINE", "hnsw_m": 32, "hnsw_ef_construction": 256, "hnsw_ef": 150, } ) ``` ### RAGAnything Framework Integration ```python # In RAGAnything framework code: def create_lightrag_instance(user_config): """Create LightRAG instance with user-provided Milvus configuration""" # User configuration from RAGAnything milvus_config = { "cosine_better_than_threshold": user_config.get("threshold", 0.2), "index_type": user_config.get("index_type", "HNSW"), "hnsw_m": user_config.get("hnsw_m", 32), # ... other parameters } # Pass configuration to LightRAG rag = LightRAG( working_dir=user_config["working_dir"], vector_storage="MilvusVectorDBStorage", vector_db_storage_cls_kwargs=milvus_config, ) return rag ``` ### Advanced Configuration with HNSW_SQ ```python rag = LightRAG( working_dir="./demo", vector_storage="MilvusVectorDBStorage", vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": 0.2, "index_type": "HNSW_SQ", # Requires Milvus 2.6.8+ "metric_type": "COSINE", "hnsw_m": 48, "hnsw_ef_construction": 400, "hnsw_ef": 200, "sq_type": "SQ8", "sq_refine": True, "sq_refine_type": "FP32", "sq_refine_k": 20, } ) ``` ### IVF Configuration ```python rag = LightRAG( working_dir="./demo", vector_storage="MilvusVectorDBStorage", vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": 0.2, "index_type": "IVF_FLAT", "metric_type": "L2", "ivf_nlist": 2048, "ivf_nprobe": 32, } ) ``` ## Implementation Details ### How It Works 1. When `MilvusVectorDBStorage.__post_init__()` is called: ```python kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {}) index_config_keys = MilvusIndexConfig.get_config_field_names() index_config_params = { k: v for k, v in kwargs.items() if k in index_config_keys } self.index_config = MilvusIndexConfig(**index_config_params) ``` 2. `MilvusIndexConfig.get_config_field_names()` dynamically extracts all valid parameter names from the dataclass 3. Only valid Milvus index parameters are extracted from kwargs 4. Parameters are passed to `MilvusIndexConfig` which applies defaults and validates them 5. Environment variables are used as fallback for any parameters not provided in kwargs ### Automatic Synchronization The implementation uses `MilvusIndexConfig.get_config_field_names()` to dynamically extract valid parameters. This means: - ✅ New parameters added to `MilvusIndexConfig` are **automatically recognized** - ✅ No need to maintain duplicate parameter lists - ✅ Single source of truth for configuration parameters ## Testing The configuration via `vector_db_storage_cls_kwargs` is thoroughly tested: ```bash # Run all kwargs bridge tests python -m pytest tests/test_milvus_kwargs_bridge.py -v # Test RAGAnything integration scenario specifically python -m pytest tests/test_milvus_kwargs_bridge.py::TestMilvusKwargsParameterBridge::test_raganything_framework_integration_scenario -v # Test all parameters support python -m pytest tests/test_milvus_kwargs_bridge.py::TestMilvusKwargsParameterBridge::test_all_milvus_parameters_supported_via_kwargs -v ``` ## Examples See `examples/milvus_kwargs_configuration_demo.py` for a complete working example. ## Backward Compatibility ✅ **100% backward compatible** with existing code ✅ Environment variable configuration still works ✅ All existing tests pass ## FAQ ### Q: Can I mix kwargs and environment variables? **A:** Yes! Parameters in `vector_db_storage_cls_kwargs` take priority over environment variables. ### Q: What happens to non-Milvus parameters in kwargs? **A:** They are ignored. Only valid MilvusIndexConfig parameters are extracted. This allows frameworks to pass their own parameters alongside Milvus configuration. ### Q: Do I need to set environment variables? **A:** No! When using `vector_db_storage_cls_kwargs`, environment variables are optional. They serve as fallback values. ### Q: Is this approach recommended for RAGAnything? **A:** Yes! This is the **recommended approach** for any framework that builds on top of LightRAG, as it allows clean configuration passing through framework layers. ## References - Test Suite: `tests/test_milvus_kwargs_bridge.py` - Implementation: `lightrag/kg/milvus_impl.py` (lines 1237-1272) - Example: `examples/milvus_kwargs_configuration_demo.py` - MilvusIndexConfig: `lightrag/kg/milvus_impl.py` (lines 75-303) ================================================ FILE: docs/OfflineDeployment.md ================================================ # LightRAG Offline Deployment Guide This guide provides comprehensive instructions for deploying LightRAG in offline environments where internet access is limited or unavailable. If you deploy LightRAG using Docker, there is no need to refer to this document, as the LightRAG Docker image is pre-configured for offline operation. > Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, are outside the scope of offline installation support. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service. ## Table of Contents - [Overview](#overview) - [Quick Start](#quick-start) - [Layered Dependencies](#layered-dependencies) - [Tiktoken Cache Management](#tiktoken-cache-management) - [Complete Offline Deployment Workflow](#complete-offline-deployment-workflow) - [Troubleshooting](#troubleshooting) ## Overview LightRAG uses dynamic package installation (`pipmaster`) for optional features based on file types and configurations. In offline environments, these dynamic installations will fail. This guide shows you how to pre-install all necessary dependencies and cache files. ### What Gets Dynamically Installed? LightRAG dynamically installs packages for: - **Storage Backends**: `redis`, `neo4j`, `pymilvus`, `pymongo`, `asyncpg`, `qdrant-client` - **LLM Providers**: `openai`, `anthropic`, `ollama`, `zhipuai`, `aioboto3`, `voyageai`, `llama-index`, `lmdeploy`, `transformers`, `torch` - **Tiktoken Models**: BPE encoding models downloaded from OpenAI CDN **Note**: Document processing dependencies (`pypdf`, `python-docx`, `python-pptx`, `openpyxl`) are now pre-installed with the `api` extras group and no longer require dynamic installation. ## Quick Start ### Option 1: Using pip with Offline Extras ```bash # Online environment: Install all offline dependencies pip install lightrag-hku[offline] # Download tiktoken cache lightrag-download-cache # Create offline package pip download lightrag-hku[offline] -d ./offline-packages tar -czf lightrag-offline.tar.gz ./offline-packages ~/.tiktoken_cache # Transfer to offline server scp lightrag-offline.tar.gz user@offline-server:/path/to/ # Offline environment: Install tar -xzf lightrag-offline.tar.gz pip install --no-index --find-links=./offline-packages lightrag-hku[offline] export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache ``` ### Option 2: Using Requirements Files ```bash # Online environment: Download packages pip download -r requirements-offline.txt -d ./packages # Transfer to offline server tar -czf packages.tar.gz ./packages scp packages.tar.gz user@offline-server:/path/to/ # Offline environment: Install tar -xzf packages.tar.gz pip install --no-index --find-links=./packages -r requirements-offline.txt ``` ## Layered Dependencies LightRAG provides flexible dependency groups for different use cases: ### Available Dependency Groups | Group | Description | Use Case | | ----- | ----------- | -------- | | `api` | API server + document processing | FastAPI server with PDF, DOCX, PPTX, XLSX support | | `offline-storage` | Storage backends | Redis, Neo4j, MongoDB, PostgreSQL, etc. | | `offline-llm` | LLM providers | OpenAI, Anthropic, Ollama, etc. | | `offline` | Complete offline package | API + Storage + LLM (all features) | **Note**: Document processing (PDF, DOCX, PPTX, XLSX) is included in the `api` extras group. The previous `offline-docs` group has been merged into `api` for better integration. > Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group. ### Installation Examples ```bash # Install API with document processing pip install lightrag-hku[api] # Install API and storage backends pip install lightrag-hku[api,offline-storage] # Install all offline dependencies (recommended for offline deployment) pip install lightrag-hku[offline] ``` ### Using Individual Requirements Files ```bash # Storage backends only pip install -r requirements-offline-storage.txt # LLM providers only pip install -r requirements-offline-llm.txt # All offline dependencies pip install -r requirements-offline.txt ``` ## Tiktoken Cache Management Tiktoken downloads BPE encoding models on first use. In offline environments, you must pre-download these models. ### Using the CLI Command After installing LightRAG, use the built-in command: ```bash # Download to default location (see output for exact path) lightrag-download-cache # Download to specific directory lightrag-download-cache --cache-dir ./tiktoken_cache # Download specific models only lightrag-download-cache --models gpt-4o-mini gpt-4 ``` ### Default Models Downloaded - `gpt-4o-mini` (LightRAG default) - `gpt-4o` - `gpt-4` - `gpt-3.5-turbo` - `text-embedding-ada-002` - `text-embedding-3-small` - `text-embedding-3-large` ### Setting Cache Location in Offline Environment ```bash # Option 1: Environment variable (temporary) export TIKTOKEN_CACHE_DIR=/path/to/tiktoken_cache # Option 2: Add to ~/.bashrc or ~/.zshrc (persistent) echo 'export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache' >> ~/.bashrc source ~/.bashrc # Option 3: Copy to default location cp -r /path/to/tiktoken_cache ~/.tiktoken_cache/ ``` ## Complete Offline Deployment Workflow ### Step 1: Prepare in Online Environment ```bash # 1. Install LightRAG with offline dependencies pip install lightrag-hku[offline] # 2. Download tiktoken cache lightrag-download-cache --cache-dir ./offline_cache/tiktoken # 3. Download all Python packages pip download lightrag-hku[offline] -d ./offline_cache/packages # 4. Create archive for transfer tar -czf lightrag-offline-complete.tar.gz ./offline_cache # 5. Verify contents tar -tzf lightrag-offline-complete.tar.gz | head -20 ``` ### Step 2: Transfer to Offline Environment ```bash # Using scp scp lightrag-offline-complete.tar.gz user@offline-server:/tmp/ # Or using USB/physical media # Copy lightrag-offline-complete.tar.gz to USB drive ``` ### Step 3: Install in Offline Environment ```bash # 1. Extract archive cd /tmp tar -xzf lightrag-offline-complete.tar.gz # 2. Install Python packages pip install --no-index \ --find-links=/tmp/offline_cache/packages \ lightrag-hku[offline] # 3. Set up tiktoken cache mkdir -p ~/.tiktoken_cache cp -r /tmp/offline_cache/tiktoken/* ~/.tiktoken_cache/ export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache # 4. Add to shell profile for persistence echo 'export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache' >> ~/.bashrc ``` ### Step 4: Verify Installation ```bash # Test Python import python -c "from lightrag import LightRAG; print('✓ LightRAG imported')" # Test tiktoken python -c "from lightrag.utils import TiktokenTokenizer; t = TiktokenTokenizer(); print('✓ Tiktoken working')" # Test optional dependencies (if installed) python -c "import docling; print('✓ Docling available')" python -c "import redis; print('✓ Redis available')" ``` ## Troubleshooting ### Issue: Tiktoken fails with network error **Problem**: `Unable to load tokenizer for model gpt-4o-mini` **Solution**: ```bash # Ensure TIKTOKEN_CACHE_DIR is set echo $TIKTOKEN_CACHE_DIR # Verify cache files exist ls -la ~/.tiktoken_cache/ # If empty, you need to download cache in online environment first ``` ### Issue: Dynamic package installation fails **Problem**: `Error installing package xxx` **Solution**: ```bash # Pre-install the specific package you need # For API with document processing: pip install lightrag-hku[api] # For storage backends: pip install lightrag-hku[offline-storage] # For LLM providers: pip install lightrag-hku[offline-llm] ``` ### Issue: Missing dependencies at runtime **Problem**: `ModuleNotFoundError: No module named 'xxx'` **Solution**: ```bash # Check what you have installed pip list | grep -i xxx # Install missing component pip install lightrag-hku[offline] # Install all offline deps ``` ### Issue: Permission denied on tiktoken cache **Problem**: `PermissionError: [Errno 13] Permission denied` **Solution**: ```bash # Ensure cache directory has correct permissions chmod 755 ~/.tiktoken_cache chmod 644 ~/.tiktoken_cache/* # Or use a user-writable directory export TIKTOKEN_CACHE_DIR=~/my_tiktoken_cache mkdir -p ~/my_tiktoken_cache ``` ## Best Practices 1. **Test in Online Environment First**: Always test your complete setup in an online environment before going offline. 2. **Keep Cache Updated**: Periodically update your offline cache when new models are released. 3. **Document Your Setup**: Keep notes on which optional dependencies you actually need. 4. **Version Pinning**: Consider pinning specific versions in production: ```bash pip freeze > requirements-production.txt ``` 5. **Minimal Installation**: Only install what you need: ```bash # If you only need API with document processing pip install lightrag-hku[api] # Then manually add specific LLM: pip install openai ``` ## Additional Resources - [LightRAG GitHub Repository](https://github.com/HKUDS/LightRAG) - [Docker Deployment Guide](./DockerDeployment.md) - [API Documentation](../lightrag/api/README.md) ## Support If you encounter issues not covered in this guide: 1. Check the [GitHub Issues](https://github.com/HKUDS/LightRAG/issues) 2. Review the [project documentation](../README.md) 3. Create a new issue with your offline deployment details ================================================ FILE: docs/UV_LOCK_GUIDE.md ================================================ # uv.lock Update Guide ## What is uv.lock? `uv.lock` is uv's lock file. It captures the exact version of every dependency, including transitive ones, much like: - Node.js `package-lock.json` - Rust `Cargo.lock` - Python Poetry `poetry.lock` Keeping `uv.lock` in version control guarantees that everyone installs the same dependency set. ## When does uv.lock change? ### Situations where it does *not* change automatically - Running `uv sync --frozen` - Building Docker images that call `uv sync --frozen` - Editing source code without touching dependency metadata ### Situations where it will change 1. **`uv lock` or `uv lock --upgrade`** ```bash uv lock # Resolve according to current constraints uv lock --upgrade # Re-resolve and upgrade to the newest compatible releases ``` Use these commands after modifying `pyproject.toml`, when you want fresh dependency versions, or if the lock file was deleted or corrupted. 2. **`uv add`** ```bash uv add requests # Adds the dependency and updates both files uv add --dev pytest # Adds a dev dependency ``` `uv add` edits `pyproject.toml` and refreshes `uv.lock` in one step. 3. **`uv remove`** ```bash uv remove requests ``` This removes the dependency from `pyproject.toml` and rewrites `uv.lock`. 4. **`uv sync` without `--frozen`** ```bash uv sync ``` Normally this only installs what is already locked. However, if `pyproject.toml` and `uv.lock` disagree or the lock file is missing, uv will regenerate and update `uv.lock`. In CI and production builds you should prefer `uv sync --frozen` to prevent unintended updates. ## Example workflows ### Scenario 1: Add a new dependency ```bash # Recommended: let uv handle both files uv add fastapi git add pyproject.toml uv.lock git commit -m "Add fastapi dependency" # Manual alternative # 1. Edit pyproject.toml # 2. Regenerate the lock file uv lock git add pyproject.toml uv.lock git commit -m "Add fastapi dependency" ``` ### Scenario 2: Relax or tighten a version constraint ```bash # 1. Edit the requirement in pyproject.toml, # e.g. openai>=1.0.0,<2.0.0 -> openai>=1.5.0,<2.0.0 # 2. Re-resolve the lock file uv lock # 3. Commit both files git add pyproject.toml uv.lock git commit -m "Update openai to >=1.5.0" ``` ### Scenario 3: Upgrade everything to the newest compatible versions ```bash uv lock --upgrade git diff uv.lock git add uv.lock git commit -m "Upgrade dependencies to latest compatible versions" ``` ### Scenario 4: Teammate syncing the project ```bash git pull # Fetch latest code and lock file uv sync --frozen # Install exactly what uv.lock specifies ``` ## Using uv.lock in Docker ```dockerfile RUN uv sync --frozen --no-dev --extra api ``` `--frozen` guarantees reproducible builds because uv will refuse to deviate from the locked versions. `--extra api` install API server ## Generating a lock file that includes offline dependencies If you need `uv.lock` to capture the optional offline stacks, regenerate it with the relevant extras enabled: ```bash uv lock --extra api --extra offline ``` This command resolves the base project requirements plus both the `api` and `offline` optional dependency sets, ensuring downstream `uv sync --frozen --extra api --extra offline` installs work without further resolution. ## Frequently asked questions - **`uv.lock` is almost 1 MB. Does that matter?** No. The file is read only during dependency resolution. - **Should we commit `uv.lock`?** Yes. Commit it so collaborators and CI jobs share the same dependency graph. - **Deleted the lock file by accident?** Run `uv lock` to regenerate it from `pyproject.toml`. - **Can `uv.lock` and `requirements.txt` coexist?** They can, but maintaining both is redundant. Prefer relying on `uv.lock` alone whenever possible. - **How do I inspect locked versions?** ```bash uv tree grep -A5 'name = "openai"' uv.lock ``` ## Best practices ### Recommended 1. Commit `uv.lock` alongside `pyproject.toml`. 2. Use `uv sync --frozen` in CI, Docker, and other reproducible environments. 3. Use plain `uv sync` during local development if you want uv to reconcile the lock for you. 4. Run `uv lock --upgrade` periodically to pick up the latest compatible releases. 5. Regenerate the lock file immediately after changing dependency constraints. ### Avoid 1. Running `uv sync` without `--frozen` in CI or production pipelines. 2. Editing `uv.lock` by hand—uv will overwrite manual edits. 3. Ignoring lock file diffs in code reviews—unexpected dependency changes can break builds. ## Summary | Command | Updates `uv.lock` | Typical use | |-----------------------|-------------------|-------------------------------------------| | `uv lock` | ✅ Yes | After editing constraints | | `uv lock --upgrade` | ✅ Yes | Upgrade to the newest compatible versions | | `uv add ` | ✅ Yes | Add a dependency | | `uv remove ` | ✅ Yes | Remove a dependency | | `uv sync` | ⚠️ Maybe | Local development; can regenerate the lock | | `uv sync --frozen` | ❌ No | CI/CD, Docker, reproducible builds | Remember: `uv.lock` only changes when you run a command that tells it to. Keep it in sync with your project and commit it whenever it changes. ================================================ FILE: env.example ================================================ ### All configurable environment variable must show up in this sample file in active or comment out status ### Setup tool `make env-*` uses this file to generate final .env file ### Lines starting with `# #` represent repeated environment variables; ### These are placeholders and setup tool should not be substituted with actual values in this lines. ### Wizard metadata: describes which runtime this generated .env currently targets. ### Target environment of this env file: host/compose (compose is for Dokcer or Kubernetes) # LIGHTRAG_RUNTIME_TARGET=host ########################### ### Server Configuration ########################### HOST=0.0.0.0 PORT=9621 WEBUI_TITLE='My Graph KB' WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System" # WORKERS=2 ### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set) # TIMEOUT=150 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080 ### Optional SSL Configuration ### Docker note: generated compose files mount staged certs at /app/data/certs/ inside the container # SSL=true # SSL_CERTFILE=/path/to/cert.pem # SSL_KEYFILE=/path/to/key.pem ### Directory Configuration (defaults to current working directory) ### Default value is ./inputs and ./rag_storage # INPUT_DIR= # WORKING_DIR= ### Tiktoken cache directory (Store cached files in this folder for offline deployment) # TIKTOKEN_CACHE_DIR=/app/data/tiktoken ### Ollama Emulating Model and Tag # OLLAMA_EMULATING_MODEL_NAME=lightrag OLLAMA_EMULATING_MODEL_TAG=latest ### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value) # MAX_GRAPH_NODES=1000 ### Logging level # LOG_LEVEL=INFO # VERBOSE=False # LOG_MAX_BYTES=10485760 # LOG_BACKUP_COUNT=5 ### Logfile location (defaults to current working directory) # LOG_DIR=/path/to/log/directory ##################################### ### Login and API-Key Configuration ##################################### # AUTH_ACCOUNTS='admin:admin123,user1:pass456' # TOKEN_SECRET=Your-Key-For-LightRAG-API-Server # JWT_ALGORITHM=HS256 # TOKEN_EXPIRE_HOURS=48 # GUEST_TOKEN_EXPIRE_HOURS=24 ### Token Auto-Renewal Configuration (Sliding Window Expiration) ### Enable automatic token renewal to prevent active users from being logged out ### When enabled, tokens will be automatically renewed when remaining time < threshold # TOKEN_AUTO_RENEW=true ### Token renewal threshold (0.0 - 1.0) ### Renew token when remaining time < (total time * threshold) ### Default: 0.5 (renew when 50% time remaining) ### Examples: ### 0.5 = renew when 24h token has 12h left ### 0.25 = renew when 24h token has 6h left # TOKEN_RENEW_THRESHOLD=0.5 ### Note: Token renewal is automatically skipped for certain endpoints: ### - /health: Health check endpoint (no authentication required) ### - /documents/paginated: Frequently polled by client (5-30s interval) ### - /documents/pipeline_status: Very frequently polled by client (2s interval) ### - Rate limit: Minimum 60 seconds between renewals for same user ### API-Key to access LightRAG Server API ### Use this key in HTTP requests with the 'X-API-Key' header ### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query # LIGHTRAG_API_KEY=your-secure-api-key-here # WHITELIST_PATHS=/health,/api/* ###################################################################################### ### Query Configuration ### ### How to control the context length sent to LLM: ### MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS ### Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens ###################################################################################### # LLM response cache for query (Not valid for streaming response) # ENABLE_LLM_CACHE=true # COSINE_THRESHOLD=0.2 ### Number of entities or relations retrieved from KG # TOP_K=40 ### Maximum number or chunks for naive vector search # CHUNK_TOP_K=20 ### control the actual entities send to LLM # MAX_ENTITY_TOKENS=6000 ### control the actual relations send to LLM # MAX_RELATION_TOKENS=8000 ### control the maximum tokens send to LLM (include entities, relations and chunks) # MAX_TOTAL_TOKENS=30000 ### chunk selection strategies ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM ### If reranking is enabled, the impact of chunk selection strategies will be diminished. # KG_CHUNK_PICK_METHOD=VECTOR ### maximum number of related chunks per source entity or relation ### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) ### Higher values increase re-ranking time # RELATED_CHUNK_NUMBER=5 ######################################################### ### Reranking configuration ### RERANK_BINDING type: null, cohere, jina, aliyun ### For rerank model deployed by vLLM use cohere binding ### If LightRAG deployed in Docker: ### uses host.docker.internal instead of localhost in RERANK_BINDING_HOST ######################################################### RERANK_BINDING=null # RERANK_MODEL=BAAI/bge-reranker-v2-m3 # RERANK_BINDING_HOST=http://localhost:8000/rerank # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough) # MIN_RERANK_SCORE=0.0 ### Enable rerank by default in query params when RERANK_BINDING is not null # RERANK_BY_DEFAULT=True ### Cohere AI # # RERANK_MODEL=rerank-v3.5 # # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank # # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### Cohere rerank chunking configuration (useful for models with token limits like ColBERT) # RERANK_ENABLE_CHUNKING=true # RERANK_MAX_TOKENS_PER_DOC=480 ### Aliyun Dashscope # # RERANK_MODEL=gte-rerank-v2 # # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank # # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### Jina AI # # RERANK_MODEL=jina-reranker-v2-base-multilingual # # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank # # RERANK_BINDING_API_KEY=your_rerank_api_key_here ### For local deployment Embedding and Reranker with vLLM (OpenAI-compatible API) ### Wizard metadata used to preserve the chosen deployment provider across setup reruns # LIGHTRAG_SETUP_EMBEDDING_PROVIDER=vllm # LIGHTRAG_SETUP_RERANK_PROVIDER=vllm # VLLM_EMBED_MODEL=BAAI/bge-m3 # VLLM_EMBED_PORT=8001 # VLLM_EMBED_DEVICE=cpu ### VLLM_EMBED_API_KEY is passed as --api-key to vLLM; synced to EMBEDDING_BINDING_API_KEY; auto-generated if blank # VLLM_EMBED_API_KEY= # VLLM_EMBED_EXTRA_ARGS= # VLLM_RERANK_MODEL=BAAI/bge-reranker-v2-m3 # VLLM_RERANK_PORT=8000 # VLLM_RERANK_DEVICE=cuda ### VLLM_RERANK_API_KEY is passed as --api-key to vLLM; synced to RERANK_BINDING_API_KEY; auto-generated if blank # VLLM_RERANK_API_KEY= ### Use float16 for GPU mode. CPU mode uses the official vLLM CPU image. # VLLM_USE_CPU=1 ### Set to 1 for CPU mode, unset for GPU mode # CUDA_VISIBLE_DEVICES=-1 ### Set to -1 to disable CUDA (CPU mode), or specific GPU IDs for GPU mode # NVIDIA_VISIBLE_DEVICES=0 ### Optional Docker runtime equivalent; generated GPU compose honors either variable. # VLLM_RERANK_EXTRA_ARGS= ######################################## ### Document processing configuration ######################################## ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Document processing output language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English ### File upload size limit (in bytes) ### Default: 104857600 (100MB) ### Set to 0 or None for unlimited upload size ### Examples: ### 52428800 = 50MB ### 104857600 = 100MB (default) ### 209715200 = 200MB ### Note: If using Nginx as reverse proxy, also configure client_max_body_size # MAX_UPLOAD_SIZE=104857600 ### Entity types that the LLM will attempt to recognize # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]' ### Chunk size for document splitting, 500~1500 is recommended # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 ### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended) # FORCE_LLM_SUMMARY_ON_MERGE=8 ### Max description token size to trigger LLM summary # SUMMARY_MAX_TOKENS = 1200 ### Recommended LLM summary output length in tokens # SUMMARY_LENGTH_RECOMMENDED=600 ### Maximum context size sent to LLM for description summary # SUMMARY_CONTEXT_SIZE=12000 ### Maximum token size allowed for entity extraction input context # MAX_EXTRACT_INPUT_TOKENS=20480 ### control the maximum chunk_ids stored in vector and graph db # MAX_SOURCE_IDS_PER_ENTITY=300 # MAX_SOURCE_IDS_PER_RELATION=300 ### control chunk_ids limitation method: FIFO, KEEP ### FIFO: First in first out ### KEEP: Keep oldest (less merge action and faster) # SOURCE_IDS_LIMIT_METHOD=FIFO # Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance) # MAX_FILE_PATHS=100 ### PDF decryption password for protected PDF files # PDF_DECRYPT_PASSWORD=your_pdf_password_here ############################### ### Concurrency Configuration ############################### ### Max concurrency requests of LLM (for both query and document processing) MAX_ASYNC=4 ### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended) MAX_PARALLEL_INSERT=2 ### Max concurrency requests for Embedding # EMBEDDING_FUNC_MAX_ASYNC=8 ### Num of chunks send to Embedding in single request # EMBEDDING_BATCH_NUM=10 ########################################################################### ### LLM Configuration ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini ### LLM_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK) ### LLM_BINDING_API_KEY: api key ### If LightRAG deployed in Docker: ### uses host.docker.internal instead of localhost in LLM_BINDING_HOST ########################################################################### ### LLM request timeout setting for all llm (0 means no timeout for Ollma) # LLM_TIMEOUT=180 LLM_BINDING=openai LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key LLM_MODEL=gpt-5-mini ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding openai --help ### OpenAI Specific Parameters # OPENAI_LLM_REASONING_EFFORT=minimal ### OpenRouter Specific Parameters # OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}' ### Qwen3 Specific Parameters deploy by vLLM # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}' ### OpenAI Compatible API Specific Parameters ### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. # OPENAI_LLM_TEMPERATURE=0.9 ### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) ### Typically, max_tokens does not include prompt content ### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider # OPENAI_LLM_MAX_TOKENS=9000 ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens # OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ### Azure OpenAI example ### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead # AZURE_OPENAI_API_VERSION=2024-08-01-preview # # LLM_BINDING=azure_openai # # LLM_BINDING_HOST=https://xxxx.openai.azure.com/ # # LLM_BINDING_API_KEY=your_api_key # # LLM_MODEL=my-gpt-mini-deployment ### Openrouter example # # LLM_BINDING=openai # # LLM_BINDING_HOST=https://openrouter.ai/api/v1 # # LLM_BINDING_API_KEY=your_api_key # # LLM_MODEL=google/gemini-2.5-flash ### Google Gemini example (AI Studio) # # LLM_BINDING=gemini # # LLM_BINDING_API_KEY=your_gemini_api_key # # LLM_BINDING_HOST=https://generativelanguage.googleapis.com # # LLM_MODEL=gemini-flash-latest ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding gemini --help ### Gemini Specific Parameters # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000 # GEMINI_LLM_TEMPERATURE=0.7 ### Enable or disable thinking # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}' # # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' ### Google Vertex AI example ### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication ### LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT means select endpoit based on project and location automatically # # LLM_BINDING=gemini # # LM_BINDING_HOST=https://aiplatform.googleapis.com ### or use DEFAULT_GEMINI_ENDPOINT to select endpoint based on project and location automatically # # LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT # # LLM_MODEL=gemini-2.5-flash # GOOGLE_GENAI_USE_VERTEXAI=true # GOOGLE_CLOUD_PROJECT='your-project-id' # GOOGLE_CLOUD_LOCATION='us-central1' # GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json' ### Ollama example # # LLM_BINDING=ollama # # LLM_BINDING_HOST=http://localhost:11434 # # LLM_MODEL=qwen3.5:9b ### use the following command to see all support options for Ollama LLM ### lightrag-server --llm-binding ollama --help ### Ollama Server Specific Parameters ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000 OLLAMA_LLM_NUM_CTX=32768 ### Set the max_output_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) # OLLAMA_LLM_NUM_PREDICT=9000 # OLLAMA_LLM_TEMPERATURE=0.85 ### Stop sequences for Ollama LLM # OLLAMA_LLM_STOP='["", "<|EOT|>"]' ### Bedrock Specific Parameters ### Bedrock uses AWS credentials from the environment / AWS credential chain. ### It does not use LLM_BINDING_API_KEY. # # LLM_BINDING=aws_bedrock # # LLM_MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0 # AWS_ACCESS_KEY_ID=your_aws_access_key_id # AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key # AWS_SESSION_TOKEN=your_optional_aws_session_token # AWS_REGION=us-east-1 # BEDROCK_LLM_TEMPERATURE=1.0 ####################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock ### EMBEDDING_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK) ### EMBEDDING_BINDING_API_KEY: api key ### If LightRAG deployed in Docker: ### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST ### Control whether to send embedding_dim parameter to embedding API ### For OpenAI: Set EMBEDDING_SEND_DIM=true to enable dynamic dimension adjustment ### For OpenAI: Set EMBEDDING_SEND_DIM=false (default) to disable sending dimension parameter ### For Gemini: Allways set EMBEDDING_SEND_DIM=true ####################################################################################### # EMBEDDING_TIMEOUT=30 ### OpenAI compatible embedding EMBEDDING_BINDING=openai EMBEDDING_BINDING_HOST=https://api.openai.com/v1 EMBEDDING_BINDING_API_KEY=your_api_key EMBEDDING_MODEL=text-embedding-3-large EMBEDDING_DIM=3072 EMBEDDING_TOKEN_LIMIT=8192 EMBEDDING_SEND_DIM=false ### Optional for Azure Embedding ### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead # # EMBEDDING_BINDING=azure_openai # # EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/ # # EMBEDDING_API_KEY=your_api_key # # EMBEDDING_MODEL==my-text-embedding-3-large-deployment # # EMBEDDING_DIM=3072 # AZURE_EMBEDDING_API_VERSION=2024-08-01-preview ### Gemini embedding # # EMBEDDING_BINDING=gemini # # EMBEDDING_MODEL=gemini-embedding-001 # # EMBEDDING_DIM=1536 # # EMBEDDING_TOKEN_LIMIT=2048 # # EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com # # EMBEDDING_BINDING_API_KEY=your_api_key ### Gemini embedding requires sending dimension to server # # EMBEDDING_SEND_DIM=true ### Ollama embedding # # EMBEDDING_BINDING=ollama # # EMBEDDING_BINDING_HOST=http://localhost:11434 # # EMBEDDING_BINDING_API_KEY=your_api_key # # EMBEDDING_MODEL=qwen3-embedding:4b # # EMBEDDING_DIM=2560 ### Optional for Ollama embedding OLLAMA_EMBEDDING_NUM_CTX=8192 ### use the following command to see all support options for Ollama embedding ### lightrag-server --embedding-binding ollama --help ### Bedrock embedding ### Bedrock uses AWS credentials from the environment / AWS credential chain. ### It does not use EMBEDDING_BINDING_API_KEY. # # EMBEDDING_BINDING=aws_bedrock # # EMBEDDING_MODEL=amazon.titan-embed-text-v2:0 # # EMBEDDING_DIM=1024 # AWS_ACCESS_KEY_ID=your_aws_access_key_id # AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key # AWS_SESSION_TOKEN=your_optional_aws_session_token # AWS_REGION=us-east-1 ### Jina AI Embedding # # EMBEDDING_BINDING=jina # # EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings # # EMBEDDING_MODEL=jina-embeddings-v4 # # EMBEDDING_DIM=2048 # # EMBEDDING_BINDING_API_KEY=your_api_key #################################################################### ### WORKSPACE sets workspace name for all storage types ### for the purpose of isolating data from LightRAG instances. ### Valid workspace name constraints: a-z, A-Z, 0-9, and _ #################################################################### # WORKSPACE= ############################ ### Data storage selection ############################ ### Default storage (Recommended for small scale deployment) # LIGHTRAG_KV_STORAGE=JsonKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage ### Wizard metadata used to preserve env-storage Docker deployment defaults across setup reruns # LIGHTRAG_SETUP_POSTGRES_DEPLOYMENT=docker # LIGHTRAG_SETUP_NEO4J_DEPLOYMENT=docker # LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=docker # LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=atlas-capable # LIGHTRAG_SETUP_REDIS_DEPLOYMENT=docker # LIGHTRAG_SETUP_MILVUS_DEPLOYMENT=docker # LIGHTRAG_SETUP_QDRANT_DEPLOYMENT=docker # LIGHTRAG_SETUP_MEMGRAPH_DEPLOYMENT=docker # LIGHTRAG_SETUP_OPENSEARCH_DEPLOYMENT=docker ### Redis Storage (Recommended for production deployment) # # LIGHTRAG_KV_STORAGE=RedisKVStorage # # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage ### Vector Storage (Recommended for production deployment) # # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage # # LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage # # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage ### Graph Storage (Recommended for production deployment) # # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage # # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage ### Select OpenSearch for all storages # # LIGHTRAG_KV_STORAGE=OpenSearchKVStorage # # LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage # # LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage # # LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage ### Select PostgreSQL for all storages # # LIGHTRAG_KV_STORAGE=PGKVStorage # # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage # # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage # # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage ### Select MongoDB for all storage (Vector storage requires an Atlas-capable deployment) # # LIGHTRAG_KV_STORAGE=MongoKVStorage # # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage # # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage # # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage ### PostgreSQL Configuration POSTGRES_HOST=localhost POSTGRES_PORT=5432 POSTGRES_USER=your_username POSTGRES_PASSWORD='your_password' POSTGRES_DATABASE=rag POSTGRES_MAX_CONNECTIONS=12 ### DB specific workspace should not be set, keep for compatible only # POSTGRES_WORKSPACE=forced_workspace_name ### PostgreSQL Vector Storage Configuration ### Enable/disable vector features (default: true for backward compatibility) ### Set to false to disable pgvector extension and vector operations when using PostgreSQL ### only for KV/Graph/DocStatus storage with a different vector backend (e.g., Milvus, Qdrant) POSTGRES_ENABLE_VECTOR=true ### Vector storage type: HNSW, IVFFlat, VCHORDRQ POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_HNSW_M=16 POSTGRES_HNSW_EF=200 POSTGRES_IVFFLAT_LISTS=100 POSTGRES_VCHORDRQ_BUILD_OPTIONS= POSTGRES_VCHORDRQ_PROBES= POSTGRES_VCHORDRQ_EPSILON=1.9 ### PostgreSQL Connection Retry Configuration (Network Robustness) ### NEW DEFAULTS (v1.4.10+): Optimized for HA deployments with ~30s switchover time ### These defaults provide out-of-the-box support for PostgreSQL High Availability setups ### ### Number of retry attempts (1-100, default: 10) ### - Default 10 attempts allows ~225s total retry time (sufficient for most HA scenarios) ### - For extreme cases: increase up to 20-50 ### Initial retry backoff in seconds (0.1-300.0, default: 3.0) ### - Default 3.0s provides reasonable initial delay for switchover detection ### - For faster recovery: decrease to 1.0-2.0 ### Maximum retry backoff in seconds (must be >= backoff, max: 600.0, default: 30.0) ### - Default 30.0s matches typical switchover completion time ### - For longer switchovers: increase to 60-90 ### Connection pool close timeout in seconds (1.0-30.0, default: 5.0) # POSTGRES_CONNECTION_RETRIES=10 # POSTGRES_CONNECTION_RETRY_BACKOFF=3.0 # POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=30.0 # POSTGRES_POOL_CLOSE_TIMEOUT=5.0 ### PostgreSQL SSL Configuration (Optional) # POSTGRES_SSL_MODE=require # POSTGRES_SSL_CERT=/path/to/client-cert.pem # POSTGRES_SSL_KEY=/path/to/client-key.pem # POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem # POSTGRES_SSL_CRL=/path/to/crl.pem ### PostgreSQL Server Settings (for Supabase Supavisor) # Use this to pass extra options to the PostgreSQL connection string. # For Supabase, you might need to set it like this: # POSTGRES_SERVER_SETTINGS="options=reference%3D[project-ref]" # Default is 100 set to 0 to disable # POSTGRES_STATEMENT_CACHE_SIZE=100 ### Neo4j Configuration NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io NEO4J_USERNAME=neo4j NEO4J_PASSWORD='your_password' NEO4J_DATABASE=neo4j NEO4J_MAX_CONNECTION_POOL_SIZE=100 NEO4J_CONNECTION_TIMEOUT=30 NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30 NEO4J_MAX_TRANSACTION_RETRY_TIME=30 NEO4J_MAX_CONNECTION_LIFETIME=300 NEO4J_LIVENESS_CHECK_TIMEOUT=30 NEO4J_KEEP_ALIVE=true ### DB specific workspace should not be set, keep for compatible only # NEO4J_WORKSPACE=forced_workspace_name ### MongoDB Configuration # For MongoVectorDBStorage, MONGO_URI must point to a MongoDB endpoint with # Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local. MONGO_URI=mongodb://localhost:27017/ MONGO_DATABASE=LightRAG ### DB specific workspace should not be set, keep for compatible only # MONGODB_WORKSPACE=forced_workspace_name # Community/local Docker MongoDB example for KV, graph, or doc-status storage only: # MONGO_URI=mongodb://localhost:27017/ ### OpenSearch Configuration ### OpenSearch can be used for all storage types: KV, Vector, Graph, DocStatus # # LIGHTRAG_KV_STORAGE=OpenSearchKVStorage # # LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage # # LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage # # LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage ### Connection settings (comma-separated host:port entries; do not include http:// or https://) ### This setup wizard supports authenticated OpenSearch clusters only. ### OPENSEARCH_USE_SSL controls whether those hosts are reached over TLS. OPENSEARCH_HOSTS=localhost:9200 OPENSEARCH_USER=admin OPENSEARCH_PASSWORD=LightRAG2026_!@ OPENSEARCH_USE_SSL=true OPENSEARCH_VERIFY_CERTS=false # OPENSEARCH_TIMEOUT=30 # OPENSEARCH_MAX_RETRIES=3 ### k-NN Settings for Vector Storage (HNSW algorithm) # OPENSEARCH_KNN_EF_CONSTRUCTION=200 # OPENSEARCH_KNN_M=16 # OPENSEARCH_KNN_EF_SEARCH=100 ### PPL graphlookup for server-side graph traversal (auto-detected if not set) # OPENSEARCH_USE_PPL_GRAPHLOOKUP=true ### DB specific workspace should not be set, keep for compatible only # OPENSEARCH_WORKSPACE=forced_workspace_name ### Milvus Configuration MILVUS_URI=http://localhost:19530 MILVUS_DB_NAME=lightrag # MILVUS_DEVICE=cpu # MILVUS_USER=root # MILVUS_PASSWORD=your_password # MILVUS_TOKEN=your_token # Required for the bundled Docker Milvus stack; may come from .env or exported shell variables. # MINIO_ACCESS_KEY_ID=minioadmin # MINIO_SECRET_ACCESS_KEY=minioadmin ### DB specific workspace should not be set, keep for compatible only # MILVUS_WORKSPACE=forced_workspace_name ### Milvus Vector Index Configuration ### Index type: AUTOINDEX (default), HNSW, HNSW_SQ, HNSW_PQ, IVF_FLAT, IVF_SQ8, DISKANN # MILVUS_INDEX_TYPE=AUTOINDEX ### Metric type: COSINE (default), L2, IP # MILVUS_METRIC_TYPE=COSINE ### HNSW / HNSW_SQ / HNSW_PQ Parameters (aligned with Milvus 2.4+ defaults) ### M: Maximum number of connections per node [2-2048], default 16 # MILVUS_HNSW_M=16 ### efConstruction: Size of dynamic candidate list during build [8-512], default 360 # MILVUS_HNSW_EF_CONSTRUCTION=360 ### ef: Size of dynamic candidate list during search, default 200 # MILVUS_HNSW_EF=200 ### HNSW_SQ Specific Parameters (requires Milvus 2.6.8+) ### sq_type: Scalar quantization type - SQ4U, SQ6, SQ8 (default), BF16, FP16 # MILVUS_HNSW_SQ_TYPE=SQ8 ### refine: Enable refinement step for higher precision, default false # MILVUS_HNSW_SQ_REFINE=false ### refine_type: Refinement precision (must be higher than sq_type) - SQ6, SQ8, BF16, FP16, FP32 # MILVUS_HNSW_SQ_REFINE_TYPE=FP32 ### refine_k: Refinement expansion factor, default 10 # MILVUS_HNSW_SQ_REFINE_K=10 ### IVF_FLAT / IVF_SQ8 Parameters ### nlist: Number of cluster units [1-65536], recommended sqrt(n) for n>1M, default 1024 # MILVUS_IVF_NLIST=1024 ### nprobe: Number of units to query [1-nlist], default 16 # MILVUS_IVF_NPROBE=16 ### Qdrant QDRANT_URL=http://localhost:6333 # QDRANT_DEVICE=cpu # QDRANT_API_KEY=your-api-key ### Qdrant upsert batching (enabled by default) ### Split large upserts by estimated JSON payload size and point count ### Default 16MB keeps safe headroom below common 32MB gateway/request limits # QDRANT_UPSERT_MAX_PAYLOAD_BYTES=16777216 # QDRANT_UPSERT_MAX_POINTS_PER_BATCH=128 ### DB specific workspace should not be set, keep for compatible only # QDRANT_WORKSPACE=forced_workspace_name ### Redis REDIS_URI=redis://localhost:6379 REDIS_SOCKET_TIMEOUT=30 REDIS_CONNECT_TIMEOUT=10 REDIS_MAX_CONNECTIONS=100 REDIS_RETRY_ATTEMPTS=3 ### DB specific workspace should not be set, keep for compatible only # REDIS_WORKSPACE=forced_workspace_name ### Memgraph Configuration MEMGRAPH_URI=bolt://localhost:7687 MEMGRAPH_USERNAME= MEMGRAPH_PASSWORD= MEMGRAPH_DATABASE=memgraph ### DB specific workspace should not be set, keep for compatible only # MEMGRAPH_WORKSPACE=forced_workspace_name ########################################################### ### Langfuse Observability Configuration ### Only works with LLM provided by OpenAI compatible API ### Install with: pip install lightrag-hku[observability] ### Sign up at: https://cloud.langfuse.com or self-host ########################################################### # LANGFUSE_SECRET_KEY="" # LANGFUSE_PUBLIC_KEY="" # LANGFUSE_HOST="https://cloud.langfuse.com" # LANGFUSE_ENABLE_TRACE=true ############################ ### Evaluation Configuration ############################ ### RAGAS evaluation models (used for RAG quality assessment) ### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible ### Default uses OpenAI models for evaluation ### LLM Configuration for Evaluation # EVAL_LLM_MODEL=gpt-4o-mini ### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set) # EVAL_LLM_BINDING_API_KEY=your_api_key ### Custom OpenAI-compatible endpoint for LLM evaluation (optional) # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 ### Embedding Configuration for Evaluation # EVAL_EMBEDDING_MODEL=text-embedding-3-large ### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) # EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key ### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST) # EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1 ### Performance Tuning ### Number of concurrent test case evaluations ### Lower values reduce API rate limit issues but increase evaluation time # EVAL_MAX_CONCURRENT=2 ### TOP_K query parameter of LightRAG (default: 10) ### Number of entities or relations retrieved from KG # EVAL_QUERY_TOP_K=10 ### LLM request retry and timeout settings for evaluation # EVAL_LLM_MAX_RETRIES=5 # EVAL_LLM_TIMEOUT=180 ================================================ FILE: examples/generate_query.py ================================================ from openai import OpenAI # os.environ["OPENAI_API_KEY"] = "" def openai_complete_if_cache( model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs ) -> str: openai_client = OpenAI() messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.extend(history_messages) messages.append({"role": "user", "content": prompt}) response = openai_client.chat.completions.create( model=model, messages=messages, **kwargs ) return response.choices[0].message.content if __name__ == "__main__": description = "" prompt = f""" Given the following description of a dataset: {description} Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. Output the results in the following structure: - User 1: [user description] - Task 1: [task description] - Question 1: - Question 2: - Question 3: - Question 4: - Question 5: - Task 2: [task description] ... - Task 5: [task description] - User 2: [user description] ... - User 5: [user description] ... """ result = openai_complete_if_cache(model="gpt-4o-mini", prompt=prompt) file_path = "./queries.txt" with open(file_path, "w") as file: file.write(result) print(f"Queries written to {file_path}") ================================================ FILE: examples/graph_visual_with_html.py ================================================ import pipmaster as pm if not pm.is_installed("pyvis"): pm.install("pyvis") if not pm.is_installed("networkx"): pm.install("networkx") import networkx as nx from pyvis.network import Network import random # Load the GraphML file G = nx.read_graphml("./dickens/graph_chunk_entity_relation.graphml") # Create a Pyvis network net = Network(height="100vh", notebook=True) # Convert NetworkX graph to Pyvis network net.from_nx(G) # Add colors and title to nodes for node in net.nodes: node["color"] = "#{:06x}".format(random.randint(0, 0xFFFFFF)) if "description" in node: node["title"] = node["description"] # Add title to edges for edge in net.edges: if "description" in edge: edge["title"] = edge["description"] # Save and display the network net.show("knowledge_graph.html") ================================================ FILE: examples/graph_visual_with_neo4j.py ================================================ import os import json import xml.etree.ElementTree as ET from neo4j import GraphDatabase # Constants WORKING_DIR = "./dickens" BATCH_SIZE_NODES = 500 BATCH_SIZE_EDGES = 100 # Neo4j connection credentials NEO4J_URI = "bolt://localhost:7687" NEO4J_USERNAME = "neo4j" NEO4J_PASSWORD = "your_password" def xml_to_json(xml_file): try: tree = ET.parse(xml_file) root = tree.getroot() # Print the root element's tag and attributes to confirm the file has been correctly loaded print(f"Root element: {root.tag}") print(f"Root attributes: {root.attrib}") data = {"nodes": [], "edges": []} # Use namespace namespace = {"": "http://graphml.graphdrawing.org/xmlns"} for node in root.findall(".//node", namespace): node_data = { "id": node.get("id").strip('"'), "entity_type": node.find("./data[@key='d1']", namespace).text.strip('"') if node.find("./data[@key='d1']", namespace) is not None else "", "description": node.find("./data[@key='d2']", namespace).text if node.find("./data[@key='d2']", namespace) is not None else "", "source_id": node.find("./data[@key='d3']", namespace).text if node.find("./data[@key='d3']", namespace) is not None else "", } data["nodes"].append(node_data) for edge in root.findall(".//edge", namespace): edge_data = { "source": edge.get("source").strip('"'), "target": edge.get("target").strip('"'), "weight": float(edge.find("./data[@key='d5']", namespace).text) if edge.find("./data[@key='d5']", namespace) is not None else 0.0, "description": edge.find("./data[@key='d6']", namespace).text if edge.find("./data[@key='d6']", namespace) is not None else "", "keywords": edge.find("./data[@key='d9']", namespace).text if edge.find("./data[@key='d9']", namespace) is not None else "", "source_id": edge.find("./data[@key='d8']", namespace).text if edge.find("./data[@key='d8']", namespace) is not None else "", } data["edges"].append(edge_data) # Print the number of nodes and edges found print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges") return data except ET.ParseError as e: print(f"Error parsing XML file: {e}") return None except Exception as e: print(f"An error occurred: {e}") return None def convert_xml_to_json(xml_path, output_path): """Converts XML file to JSON and saves the output.""" if not os.path.exists(xml_path): print(f"Error: File not found - {xml_path}") return None json_data = xml_to_json(xml_path) if json_data: with open(output_path, "w", encoding="utf-8") as f: json.dump(json_data, f, ensure_ascii=False, indent=2) print(f"JSON file created: {output_path}") return json_data else: print("Failed to create JSON data") return None def process_in_batches(tx, query, data, batch_size): """Process data in batches and execute the given query.""" for i in range(0, len(data), batch_size): batch = data[i : i + batch_size] tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch}) def main(): # Paths xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml") json_file = os.path.join(WORKING_DIR, "graph_data.json") # Convert XML to JSON json_data = convert_xml_to_json(xml_file, json_file) if json_data is None: return # Load nodes and edges nodes = json_data.get("nodes", []) edges = json_data.get("edges", []) # Neo4j queries create_nodes_query = """ UNWIND $nodes AS node MERGE (e:Entity {id: node.id}) SET e.entity_type = node.entity_type, e.description = node.description, e.source_id = node.source_id, e.displayName = node.id REMOVE e:Entity WITH e, node CALL apoc.create.addLabels(e, [node.id]) YIELD node AS labeledNode RETURN count(*) """ create_edges_query = """ UNWIND $edges AS edge MATCH (source {id: edge.source}) MATCH (target {id: edge.target}) WITH source, target, edge, CASE WHEN edge.keywords CONTAINS 'lead' THEN 'lead' WHEN edge.keywords CONTAINS 'participate' THEN 'participate' WHEN edge.keywords CONTAINS 'uses' THEN 'uses' WHEN edge.keywords CONTAINS 'located' THEN 'located' WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs' ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '') END AS relType CALL apoc.create.relationship(source, relType, { weight: edge.weight, description: edge.description, keywords: edge.keywords, source_id: edge.source_id }, target) YIELD rel RETURN count(*) """ set_displayname_and_labels_query = """ MATCH (n) SET n.displayName = n.id WITH n CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node RETURN count(*) """ # Create a Neo4j driver driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) try: # Execute queries in batches with driver.session() as session: # Insert nodes in batches session.execute_write( process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES ) # Insert edges in batches session.execute_write( process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES ) # Set displayName and labels session.run(set_displayname_and_labels_query) except Exception as e: print(f"Error occurred: {e}") finally: driver.close() if __name__ == "__main__": main() ================================================ FILE: examples/graph_visual_with_opensearch.py ================================================ """ Knowledge Graph Visualization with OpenSearch + LightRAG WebUI This script demonstrates two ways to visualize the knowledge graph stored in OpenSearch: 1. **WebUI (recommended)**: Opens the LightRAG WebUI in your browser for interactive graph exploration with search, filtering, and force-directed layout. 2. **Standalone HTML**: Fetches graph data from the LightRAG Server API and generates an interactive HTML file using Pyvis, similar to graph_visual_with_html.py but reading from OpenSearch instead of a local .graphml file. Prerequisites: 1. LightRAG Server running with OpenSearch storage: lightrag-server --host 0.0.0.0 --port 9621 2. Documents already indexed (e.g., via the WebUI or API) Usage: # Open WebUI for interactive exploration python examples/graph_visual_with_opensearch.py # Generate standalone HTML file python examples/graph_visual_with_opensearch.py --html # Custom server URL and output file python examples/graph_visual_with_opensearch.py --html --server http://localhost:9621 --output my_graph.html """ import argparse import os import sys import webbrowser import pipmaster as pm if not pm.is_installed("requests"): pm.install("requests") if not pm.is_installed("pyvis"): pm.install("pyvis") import requests from pyvis.network import Network def fetch_graph(server_url: str, label: str = "*", max_nodes: int = 300) -> dict: """Fetch knowledge graph data from LightRAG Server API.""" url = f"{server_url}/graphs" params = {"label": label, "max_nodes": max_nodes} resp = requests.get(url, params=params, timeout=30) resp.raise_for_status() return resp.json() def generate_html(graph_data: dict, output_file: str) -> str: """Generate an interactive HTML visualization from graph data.""" nodes = graph_data.get("nodes", []) edges = graph_data.get("edges", []) if not nodes: print("No nodes found in the graph. Index some documents first.") sys.exit(1) print(f"Building visualization: {len(nodes)} nodes, {len(edges)} edges") net = Network(height="100vh", notebook=False, cdn_resources="in_line") # Add nodes with colors based on entity type import hashlib for node in nodes: node_id = node.get("id", "") props = node.get("properties", {}) entity_type = props.get("entity_type", "unknown") description = props.get("description", "") # Deterministic color from entity type color_hash = int(hashlib.md5(entity_type.encode()).hexdigest()[:6], 16) color = f"#{color_hash:06x}" net.add_node( node_id, label=node_id, title=f"[{entity_type}] {description[:200]}" if description else entity_type, color=color, ) # Add edges for edge in edges: source = edge.get("source", "") target = edge.get("target", "") props = edge.get("properties", {}) rel_type = edge.get("type", "") description = props.get("description", "") net.add_edge( source, target, title=f"[{rel_type}] {description[:200]}" if description else rel_type, label=rel_type, ) net.save_graph(output_file) print(f"Graph saved to {output_file}") return output_file def main(): parser = argparse.ArgumentParser( description="Visualize LightRAG knowledge graph from OpenSearch" ) parser.add_argument( "--html", action="store_true", help="Generate standalone HTML file instead of opening WebUI", ) parser.add_argument( "--server", default="http://localhost:9621", help="LightRAG Server URL (default: http://localhost:9621)", ) parser.add_argument( "--output", default="knowledge_graph_opensearch.html", help="Output HTML file (default: knowledge_graph_opensearch.html)", ) parser.add_argument( "--label", default="*", help="Starting node label, or '*' for all nodes (default: *)", ) parser.add_argument( "--max-nodes", type=int, default=300, help="Maximum nodes to fetch (default: 300)", ) args = parser.parse_args() # Verify server is running try: requests.get(f"{args.server}/health", timeout=5) except requests.ConnectionError: print(f"Error: Cannot connect to LightRAG Server at {args.server}") print("Start the server first: lightrag-server --host 0.0.0.0 --port 9621") sys.exit(1) if args.html: # Generate standalone HTML graph_data = fetch_graph(args.server, args.label, args.max_nodes) output = generate_html(graph_data, args.output) webbrowser.open(f"file://{os.path.abspath(output)}") else: # Open WebUI graph explorer url = f"{args.server}/#/graph" print(f"Opening LightRAG WebUI graph explorer: {url}") webbrowser.open(url) if __name__ == "__main__": main() ================================================ FILE: examples/insert_custom_kg.py ================================================ import os from lightrag import LightRAG from lightrag.llm.openai import gpt_4o_mini_complete ######### # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() # import nest_asyncio # nest_asyncio.apply() ######### WORKING_DIR = "./custom_kg" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model # llm_model_func=gpt_4o_complete # Optionally, use a stronger model ) custom_kg = { "entities": [ { "entity_name": "CompanyA", "entity_type": "Organization", "description": "A major technology company", "source_id": "Source1", }, { "entity_name": "ProductX", "entity_type": "Product", "description": "A popular product developed by CompanyA", "source_id": "Source1", }, { "entity_name": "PersonA", "entity_type": "Person", "description": "A renowned researcher in AI", "source_id": "Source2", }, { "entity_name": "UniversityB", "entity_type": "Organization", "description": "A leading university specializing in technology and sciences", "source_id": "Source2", }, { "entity_name": "CityC", "entity_type": "Location", "description": "A large metropolitan city known for its culture and economy", "source_id": "Source3", }, { "entity_name": "EventY", "entity_type": "Event", "description": "An annual technology conference held in CityC", "source_id": "Source3", }, ], "relationships": [ { "src_id": "CompanyA", "tgt_id": "ProductX", "description": "CompanyA develops ProductX", "keywords": "develop, produce", "weight": 1.0, "source_id": "Source1", }, { "src_id": "PersonA", "tgt_id": "UniversityB", "description": "PersonA works at UniversityB", "keywords": "employment, affiliation", "weight": 0.9, "source_id": "Source2", }, { "src_id": "CityC", "tgt_id": "EventY", "description": "EventY is hosted in CityC", "keywords": "host, location", "weight": 0.8, "source_id": "Source3", }, ], "chunks": [ { "content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.", "source_id": "Source1", "source_chunk_index": 0, }, { "content": "One outstanding feature of ProductX is its advanced AI capabilities.", "source_id": "Source1", "chunk_order_index": 1, }, { "content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.", "source_id": "Source2", "source_chunk_index": 0, }, { "content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.", "source_id": "Source3", "source_chunk_index": 0, }, { "content": "None", "source_id": "UNKNOWN", "source_chunk_index": 0, }, ], } rag.insert_custom_kg(custom_kg) ================================================ FILE: examples/lightrag_azure_openai_demo.py ================================================ import os import asyncio from lightrag import LightRAG, QueryParam from lightrag.utils import EmbeddingFunc import numpy as np from dotenv import load_dotenv import logging from openai import AzureOpenAI logging.basicConfig(level=logging.INFO) load_dotenv() AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT") AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT") AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION") WORKING_DIR = "./dickens" if os.path.exists(WORKING_DIR): import shutil shutil.rmtree(WORKING_DIR) os.mkdir(WORKING_DIR) async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: client = AzureOpenAI( api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_OPENAI_API_VERSION, azure_endpoint=AZURE_OPENAI_ENDPOINT, ) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) if history_messages: messages.extend(history_messages) messages.append({"role": "user", "content": prompt}) chat_completion = client.chat.completions.create( model=AZURE_OPENAI_DEPLOYMENT, # model = "deployment_name". messages=messages, temperature=kwargs.get("temperature", 0), top_p=kwargs.get("top_p", 1), n=kwargs.get("n", 1), ) return chat_completion.choices[0].message.content async def embedding_func(texts: list[str]) -> np.ndarray: client = AzureOpenAI( api_key=AZURE_OPENAI_API_KEY, api_version=AZURE_EMBEDDING_API_VERSION, azure_endpoint=AZURE_OPENAI_ENDPOINT, ) embedding = client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts) embeddings = [item.embedding for item in embedding.data] return np.array(embeddings) async def test_funcs(): result = await llm_model_func("How are you?") print("Resposta do llm_model_func: ", result) result = await embedding_func(["How are you?"]) print("Resultado do embedding_func: ", result.shape) print("Dimensão da embedding: ", result.shape[1]) asyncio.run(test_funcs()) embedding_dimension = 3072 async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=8192, func=embedding_func, ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): rag = asyncio.run(initialize_rag()) book1 = open("./book_1.txt", encoding="utf-8") book2 = open("./book_2.txt", encoding="utf-8") rag.insert([book1.read(), book2.read()]) query_text = "What are the main themes?" print("Result (Naive):") print(rag.query(query_text, param=QueryParam(mode="naive"))) print("\nResult (Local):") print(rag.query(query_text, param=QueryParam(mode="local"))) print("\nResult (Global):") print(rag.query(query_text, param=QueryParam(mode="global"))) print("\nResult (Hybrid):") print(rag.query(query_text, param=QueryParam(mode="hybrid"))) if __name__ == "__main__": main() ================================================ FILE: examples/lightrag_gemini_demo.py ================================================ """ LightRAG Demo with Google Gemini Models This example demonstrates how to use LightRAG with Google's Gemini 2.0 Flash model for text generation and the text-embedding-004 model for embeddings. Prerequisites: 1. Set GEMINI_API_KEY environment variable: export GEMINI_API_KEY='your-actual-api-key' 2. Prepare a text file named 'book.txt' in the current directory (or modify BOOK_FILE constant to point to your text file) Usage: python examples/lightrag_gemini_demo.py """ import os import asyncio import nest_asyncio import numpy as np from lightrag import LightRAG, QueryParam from lightrag.llm.gemini import gemini_model_complete, gemini_embed from lightrag.utils import wrap_embedding_func_with_attrs nest_asyncio.apply() WORKING_DIR = "./rag_storage" BOOK_FILE = "./book.txt" # Validate API key GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") if not GEMINI_API_KEY: raise ValueError( "GEMINI_API_KEY environment variable is not set. " "Please set it with: export GEMINI_API_KEY='your-api-key'" ) if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # -------------------------------------------------- # LLM function # -------------------------------------------------- async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): return await gemini_model_complete( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=GEMINI_API_KEY, model_name="gemini-2.0-flash", **kwargs, ) # -------------------------------------------------- # Embedding function # -------------------------------------------------- @wrap_embedding_func_with_attrs( embedding_dim=768, send_dimensions=True, max_token_size=2048, model_name="models/text-embedding-004", ) async def embedding_func(texts: list[str]) -> np.ndarray: return await gemini_embed.func( texts, api_key=GEMINI_API_KEY, model="models/text-embedding-004" ) # -------------------------------------------------- # Initialize RAG # -------------------------------------------------- async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=embedding_func, llm_model_name="gemini-2.0-flash", ) # 🔑 REQUIRED await rag.initialize_storages() return rag # -------------------------------------------------- # Main # -------------------------------------------------- def main(): # Validate book file exists if not os.path.exists(BOOK_FILE): raise FileNotFoundError( f"'{BOOK_FILE}' not found. " "Please provide a text file to index in the current directory." ) rag = asyncio.run(initialize_rag()) # Insert text with open(BOOK_FILE, "r", encoding="utf-8") as f: rag.insert(f.read()) query = "What are the top themes?" print("\nNaive Search:") print(rag.query(query, param=QueryParam(mode="naive"))) print("\nLocal Search:") print(rag.query(query, param=QueryParam(mode="local"))) print("\nGlobal Search:") print(rag.query(query, param=QueryParam(mode="global"))) print("\nHybrid Search:") print(rag.query(query, param=QueryParam(mode="hybrid"))) if __name__ == "__main__": main() ================================================ FILE: examples/lightrag_gemini_postgres_demo.py ================================================ """ LightRAG Demo with PostgreSQL + Google Gemini This example demonstrates how to use LightRAG with: - Google Gemini (LLM + Embeddings) - PostgreSQL-backed storages for: - Vector storage - Graph storage - KV storage - Document status storage Prerequisites: 1. PostgreSQL database running and accessible 2. Required tables will be auto-created by LightRAG 3. Set environment variables (example .env): POSTGRES_HOST=localhost POSTGRES_PORT=5432 POSTGRES_USER=admin POSTGRES_PASSWORD=admin POSTGRES_DATABASE=ai LIGHTRAG_KV_STORAGE=PGKVStorage LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage LIGHTRAG_GRAPH_STORAGE=PGGraphStorage LIGHTRAG_VECTOR_STORAGE=PGVectorStorage GEMINI_API_KEY=your-api-key 4. Prepare a text file to index (default: Data/book-small.txt) Usage: python examples/lightrag_postgres_demo.py """ import os import asyncio import numpy as np from lightrag import LightRAG, QueryParam from lightrag.llm.gemini import gemini_model_complete, gemini_embed from lightrag.utils import setup_logger, wrap_embedding_func_with_attrs # -------------------------------------------------- # Logger # -------------------------------------------------- setup_logger("lightrag", level="INFO") # -------------------------------------------------- # Config # -------------------------------------------------- WORKING_DIR = "./rag_storage" BOOK_FILE = "Data/book.txt" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") if not GEMINI_API_KEY: raise ValueError("GEMINI_API_KEY environment variable is not set") # -------------------------------------------------- # LLM function (Gemini) # -------------------------------------------------- async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs, ) -> str: return await gemini_model_complete( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=GEMINI_API_KEY, model_name="gemini-2.0-flash", **kwargs, ) # -------------------------------------------------- # Embedding function (Gemini) # -------------------------------------------------- @wrap_embedding_func_with_attrs( embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004", ) async def embedding_func(texts: list[str]) -> np.ndarray: return await gemini_embed.func( texts, api_key=GEMINI_API_KEY, model="models/text-embedding-004", ) # -------------------------------------------------- # Initialize RAG with PostgreSQL storages # -------------------------------------------------- async def initialize_rag() -> LightRAG: rag = LightRAG( working_dir=WORKING_DIR, llm_model_name="gemini-2.0-flash", llm_model_func=llm_model_func, embedding_func=embedding_func, # Performance tuning embedding_func_max_async=4, embedding_batch_num=8, llm_model_max_async=2, # Chunking chunk_token_size=1200, chunk_overlap_token_size=100, # PostgreSQL-backed storages graph_storage="PGGraphStorage", vector_storage="PGVectorStorage", doc_status_storage="PGDocStatusStorage", kv_storage="PGKVStorage", ) # REQUIRED: initialize all storage backends await rag.initialize_storages() return rag # -------------------------------------------------- # Main # -------------------------------------------------- async def main(): rag = None try: print("Initializing LightRAG with PostgreSQL + Gemini...") rag = await initialize_rag() if not os.path.exists(BOOK_FILE): raise FileNotFoundError( f"'{BOOK_FILE}' not found. Please provide a text file to index." ) print(f"\nReading document: {BOOK_FILE}") with open(BOOK_FILE, "r", encoding="utf-8") as f: content = f.read() print(f"Loaded document ({len(content)} characters)") print("\nInserting document into LightRAG (this may take some time)...") await rag.ainsert(content) print("Document indexed successfully!") print("\n" + "=" * 60) print("Running sample queries") print("=" * 60) query = "What are the top themes in this document?" for mode in ["naive", "local", "global", "hybrid"]: print(f"\n[{mode.upper()} MODE]") result = await rag.aquery(query, param=QueryParam(mode=mode)) print(result[:400] + "..." if len(result) > 400 else result) print("\nRAG system is ready for use!") except Exception as e: print("An error occurred:", e) import traceback traceback.print_exc() finally: if rag is not None: await rag.finalize_storages() if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: examples/lightrag_gemini_workspace_demo.py ================================================ """ LightRAG Data Isolation Demo: Workspace Management This example demonstrates how to maintain multiple isolated knowledge bases within a single application using LightRAG's 'workspace' feature. Key Concepts: - Workspace Isolation: Each RAG instance is assigned a unique workspace name, which ensures that Knowledge Graphs, Vector Databases, and Chunks are stored in separate, non-conflicting directories. - Independent Configuration: Different workspaces can utilize different ENTITY_TYPES and document sets simultaneously. Prerequisites: 1. Set the following environment variables: - GEMINI_API_KEY: Your Google Gemini API key. - ENTITY_TYPES: A JSON string of entity categories (e.g., '["Person", "Organization"]'). 2. Ensure your data directory contains: - Data/book-small.txt - Data/HR_policies.txt Usage: python lightrag_workspace_demo.py """ import os import asyncio import json import numpy as np from lightrag import LightRAG, QueryParam from lightrag.llm.gemini import gemini_model_complete, gemini_embed from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.constants import DEFAULT_ENTITY_TYPES async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: """Wrapper for Gemini LLM completion.""" return await gemini_model_complete( prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("GEMINI_API_KEY"), model_name="gemini-2.0-flash-exp", **kwargs, ) @wrap_embedding_func_with_attrs( embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004" ) async def embedding_func(texts: list[str]) -> np.ndarray: """Wrapper for Gemini embedding model.""" return await gemini_embed.func( texts, api_key=os.getenv("GEMINI_API_KEY"), model="models/text-embedding-004" ) async def initialize_rag( workspace: str = "default_workspace", entities=None, ) -> LightRAG: """ Initializes a LightRAG instance with data isolation. - entities (if provided) overrides everything - else ENTITY_TYPES env var is used - else DEFAULT_ENTITY_TYPES is used """ if entities is not None: entity_types = entities else: env_entities = os.getenv("ENTITY_TYPES") if env_entities: entity_types = json.loads(env_entities) else: entity_types = DEFAULT_ENTITY_TYPES rag = LightRAG( workspace=workspace, llm_model_name="gemini-2.0-flash", llm_model_func=llm_model_func, embedding_func=embedding_func, embedding_func_max_async=4, embedding_batch_num=8, llm_model_max_async=2, addon_params={"entity_types": entity_types}, ) await rag.initialize_storages() return rag async def main(): rag_1 = None rag_2 = None try: # 1. Initialize Isolated Workspaces # Instance 1: Dedicated to literary analysis # Instance 2: Dedicated to corporate HR documentation print("Initializing isolated LightRAG workspaces...") rag_1 = await initialize_rag("rag_workspace_book") rag_2 = await initialize_rag("rag_workspace_hr") # 2. Populate Workspace 1 (Literature) book_path = "Data/book-small.txt" if os.path.exists(book_path): with open(book_path, "r", encoding="utf-8") as f: print(f"Indexing {book_path} into Literature Workspace...") await rag_1.ainsert(f.read()) # 3. Populate Workspace 2 (Corporate) hr_path = "Data/HR_policies.txt" if os.path.exists(hr_path): with open(hr_path, "r", encoding="utf-8") as f: print(f"Indexing {hr_path} into HR Workspace...") await rag_2.ainsert(f.read()) # 4. Context-Specific Querying print("\n--- Querying Literature Workspace ---") res1 = await rag_1.aquery( "What is the main theme?", param=QueryParam(mode="hybrid", stream=False), ) print(f"Book Analysis: {res1[:200]}...") print("\n--- Querying HR Workspace ---") res2 = await rag_2.aquery( "What is the leave policy?", param=QueryParam(mode="hybrid") ) print(f"HR Response: {res2[:200]}...") except Exception as e: print(f"An error occurred: {e}") finally: # Finalize storage to safely close DB connections and write buffers if rag_1: await rag_1.finalize_storages() if rag_2: await rag_2.finalize_storages() if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: examples/lightrag_ollama_demo.py ================================================ import asyncio import os import inspect import logging import logging.config from functools import partial from lightrag import LightRAG, QueryParam from lightrag.llm.ollama import ollama_model_complete, ollama_embed from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug from dotenv import load_dotenv load_dotenv(dotenv_path=".env", override=False) WORKING_DIR = "./dickens" def configure_logging(): """Configure logging for the application""" # Reset any existing handlers to ensure clean configuration for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: logger_instance = logging.getLogger(logger_name) logger_instance.handlers = [] logger_instance.filters = [] # Get log directory path from environment variable or use current directory log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_ollama_demo.log")) print(f"\nLightRAG compatible demo log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_file_path), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, }, } ) # Set the logger level to INFO logger.setLevel(logging.INFO) # Enable verbose debug if needed set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, llm_model_name=os.getenv("LLM_MODEL", "qwen2.5-coder:7b"), summary_max_tokens=8192, llm_model_kwargs={ "host": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"), "options": {"num_ctx": 8192}, "timeout": int(os.getenv("TIMEOUT", "300")), }, # Note: ollama_embed is decorated with @wrap_embedding_func_with_attrs, # which wraps it in an EmbeddingFunc. Using .func accesses the original # unwrapped function to avoid double wrapping when we create our own # EmbeddingFunc with custom configuration (embedding_dim, max_token_size). embedding_func=EmbeddingFunc( embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")), func=partial( ollama_embed.func, # Access the unwrapped function to avoid double EmbeddingFunc wrapping embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"), host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"), ), ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag async def print_stream(stream): async for chunk in stream: print(chunk, end="", flush=True) async def main(): try: # Clear old data files files_to_delete = [ "graph_chunk_entity_relation.graphml", "kv_store_doc_status.json", "kv_store_full_docs.json", "kv_store_text_chunks.json", "vdb_chunks.json", "vdb_entities.json", "vdb_relationships.json", ] for file in files_to_delete: file_path = os.path.join(WORKING_DIR, file) if os.path.exists(file_path): os.remove(file_path) print(f"Deleting old file:: {file_path}") # Initialize RAG instance rag = await initialize_rag() # Test embedding function test_text = ["This is a test string for embedding."] embedding = await rag.embedding_func(test_text) embedding_dim = embedding.shape[1] print("\n=======================") print("Test embedding function") print("========================") print(f"Test dict: {test_text}") print(f"Detected embedding dimension: {embedding_dim}\n\n") with open("./book.txt", "r", encoding="utf-8") as f: await rag.ainsert(f.read()) # Perform naive search print("\n=====================") print("Query mode: naive") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="naive", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform local search print("\n=====================") print("Query mode: local") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="local", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform global search print("\n=====================") print("Query mode: global") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="global", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform hybrid search print("\n=====================") print("Query mode: hybrid") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="hybrid", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) except Exception as e: print(f"An error occurred: {e}") finally: if rag: await rag.llm_response_cache.index_done_callback() await rag.finalize_storages() if __name__ == "__main__": # Configure logging before running the main function configure_logging() asyncio.run(main()) print("\nDone!") ================================================ FILE: examples/lightrag_openai_compatible_demo.py ================================================ import os import asyncio import inspect import logging import logging.config from functools import partial from lightrag import LightRAG, QueryParam from lightrag.llm.openai import openai_complete_if_cache from lightrag.llm.ollama import ollama_embed from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug from dotenv import load_dotenv load_dotenv(dotenv_path=".env", override=False) WORKING_DIR = "./dickens" def configure_logging(): """Configure logging for the application""" # Reset any existing handlers to ensure clean configuration for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: logger_instance = logging.getLogger(logger_name) logger_instance.handlers = [] logger_instance.filters = [] # Get log directory path from environment variable or use current directory log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath( os.path.join(log_dir, "lightrag_compatible_demo.log") ) print(f"\nLightRAG compatible demo log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_dir), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, }, } ) # Set the logger level to INFO logger.setLevel(logging.INFO) # Enable verbose debug if needed set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await openai_complete_if_cache( os.getenv("LLM_MODEL", "deepseek-chat"), prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("LLM_BINDING_API_KEY") or os.getenv("OPENAI_API_KEY"), base_url=os.getenv("LLM_BINDING_HOST", "https://api.deepseek.com"), **kwargs, ) async def print_stream(stream): async for chunk in stream: if chunk: print(chunk, end="", flush=True) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, # Note: ollama_embed is decorated with @wrap_embedding_func_with_attrs, # which wraps it in an EmbeddingFunc. Using .func accesses the original # unwrapped function to avoid double wrapping when we create our own # EmbeddingFunc with custom configuration (embedding_dim, max_token_size). embedding_func=EmbeddingFunc( embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")), func=partial( ollama_embed.func, # Access the unwrapped function to avoid double EmbeddingFunc wrapping embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"), host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"), ), ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag async def main(): try: # Clear old data files files_to_delete = [ "graph_chunk_entity_relation.graphml", "kv_store_doc_status.json", "kv_store_full_docs.json", "kv_store_text_chunks.json", "vdb_chunks.json", "vdb_entities.json", "vdb_relationships.json", ] for file in files_to_delete: file_path = os.path.join(WORKING_DIR, file) if os.path.exists(file_path): os.remove(file_path) print(f"Deleting old file:: {file_path}") # Initialize RAG instance rag = await initialize_rag() # Test embedding function test_text = ["This is a test string for embedding."] embedding = await rag.embedding_func(test_text) embedding_dim = embedding.shape[1] print("\n=======================") print("Test embedding function") print("========================") print(f"Test dict: {test_text}") print(f"Detected embedding dimension: {embedding_dim}\n\n") with open("./book.txt", "r", encoding="utf-8") as f: await rag.ainsert(f.read()) # Perform naive search print("\n=====================") print("Query mode: naive") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="naive", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform local search print("\n=====================") print("Query mode: local") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="local", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform global search print("\n=====================") print("Query mode: global") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="global", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform hybrid search print("\n=====================") print("Query mode: hybrid") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="hybrid", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) except Exception as e: print(f"An error occurred: {e}") finally: if rag: await rag.finalize_storages() if __name__ == "__main__": # Configure logging before running the main function configure_logging() asyncio.run(main()) print("\nDone!") ================================================ FILE: examples/lightrag_openai_demo.py ================================================ import os import asyncio import logging import logging.config from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed from lightrag.utils import logger, set_verbose_debug WORKING_DIR = "./dickens" def configure_logging(): """Configure logging for the application""" # Reset any existing handlers to ensure clean configuration for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: logger_instance = logging.getLogger(logger_name) logger_instance.handlers = [] logger_instance.filters = [] # Get log directory path from environment variable or use current directory log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_demo.log")) print(f"\nLightRAG demo log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_dir), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, }, } ) # Set the logger level to INFO logger.setLevel(logging.INFO) # Enable verbose debug if needed set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, embedding_func=openai_embed, llm_model_func=gpt_4o_mini_complete, ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag async def main(): # Check if OPENAI_API_KEY environment variable exists if not os.getenv("OPENAI_API_KEY"): print( "Error: OPENAI_API_KEY environment variable is not set. Please set this variable before running the program." ) print("You can set the environment variable by running:") print(" export OPENAI_API_KEY='your-openai-api-key'") return # Exit the async function try: # Clear old data files files_to_delete = [ "graph_chunk_entity_relation.graphml", "kv_store_doc_status.json", "kv_store_full_docs.json", "kv_store_text_chunks.json", "vdb_chunks.json", "vdb_entities.json", "vdb_relationships.json", ] for file in files_to_delete: file_path = os.path.join(WORKING_DIR, file) if os.path.exists(file_path): os.remove(file_path) print(f"Deleting old file:: {file_path}") # Initialize RAG instance rag = await initialize_rag() # Test embedding function test_text = ["This is a test string for embedding."] embedding = await rag.embedding_func(test_text) embedding_dim = embedding.shape[1] print("\n=======================") print("Test embedding function") print("========================") print(f"Test dict: {test_text}") print(f"Detected embedding dimension: {embedding_dim}\n\n") with open("./book.txt", "r", encoding="utf-8") as f: await rag.ainsert(f.read()) # Perform naive search print("\n=====================") print("Query mode: naive") print("=====================") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) # Perform local search print("\n=====================") print("Query mode: local") print("=====================") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) # Perform global search print("\n=====================") print("Query mode: global") print("=====================") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="global"), ) ) # Perform hybrid search print("\n=====================") print("Query mode: hybrid") print("=====================") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="hybrid"), ) ) except Exception as e: print(f"An error occurred: {e}") finally: if rag: await rag.finalize_storages() if __name__ == "__main__": # Configure logging before running the main function configure_logging() asyncio.run(main()) print("\nDone!") ================================================ FILE: examples/lightrag_openai_mongodb_graph_demo.py ================================================ import os import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed from lightrag.utils import EmbeddingFunc import numpy as np ######### # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() # import nest_asyncio # nest_asyncio.apply() ######### WORKING_DIR = "./mongodb_test_dir" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) os.environ["OPENAI_API_KEY"] = "sk-" os.environ["MONGO_URI"] = "mongodb://0.0.0.0:27017/?directConnection=true" os.environ["MONGO_DATABASE"] = "LightRAG" os.environ["MONGO_KG_COLLECTION"] = "MDB_KG" # Embedding Configuration and Functions EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large") EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) async def embedding_func(texts: list[str]) -> np.ndarray: # Note: openai_embed is decorated with @wrap_embedding_func_with_attrs, # which wraps it in an EmbeddingFunc. Using .func accesses the original # unwrapped function to avoid double wrapping when we create our own # EmbeddingFunc with custom configuration in create_embedding_function_instance(). return await openai_embed.func( texts, model=EMBEDDING_MODEL, ) async def get_embedding_dimension(): test_text = ["This is a test sentence."] embedding = await embedding_func(test_text) return embedding.shape[1] async def create_embedding_function_instance(): # Get embedding dimension embedding_dimension = await get_embedding_dimension() # Create embedding function instance return EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=EMBEDDING_MAX_TOKEN_SIZE, func=embedding_func, ) async def initialize_rag(): embedding_func_instance = await create_embedding_function_instance() rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, embedding_func=embedding_func_instance, graph_storage="MongoGraphStorage", log_level="DEBUG", ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Perform naive search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) # Perform local search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) # Perform global search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) # Perform hybrid search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: examples/lightrag_openai_opensearch_graph_demo.py ================================================ """ LightRAG Demo with OpenSearch + OpenAI This example demonstrates how to use LightRAG with: - OpenAI (LLM + Embeddings) - OpenSearch-backed storages for: - KV storage - Vector storage (k-NN) - Graph storage (dual-index nodes + edges) - Document status storage Prerequisites: 1. OpenSearch cluster running and accessible (3.x or higher with k-NN plugin) 2. Required indices will be auto-created by LightRAG 3. Set environment variables (example .env): OPENSEARCH_HOSTS=localhost:9200 OPENSEARCH_USER=admin OPENSEARCH_PASSWORD=your-password OPENSEARCH_USE_SSL=false OPENSEARCH_VERIFY_CERTS=false OPENAI_API_KEY=your-api-key 4. Prepare a text file to index (default: ./book.txt) Usage: python examples/lightrag_openai_opensearch_graph_demo.py """ import os import asyncio import numpy as np from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed from lightrag.utils import setup_logger, EmbeddingFunc # -------------------------------------------------- # Logger # -------------------------------------------------- setup_logger("lightrag", level="INFO") # -------------------------------------------------- # Config # -------------------------------------------------- WORKING_DIR = "./opensearch_rag_storage" BOOK_FILE = "./book.txt" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # Replace with your API key, or set via environment variable if not os.getenv("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = "sk-" EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large") EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) # -------------------------------------------------- # Embedding function (OpenAI) # -------------------------------------------------- async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed.func( texts, model=EMBEDDING_MODEL, ) async def get_embedding_dimension(): test_text = ["This is a test sentence."] embedding = await embedding_func(test_text) return embedding.shape[1] async def create_embedding_function_instance(): embedding_dimension = await get_embedding_dimension() return EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=EMBEDDING_MAX_TOKEN_SIZE, func=embedding_func, ) # -------------------------------------------------- # Initialize RAG with OpenSearch storages # -------------------------------------------------- async def initialize_rag() -> LightRAG: embedding_func_instance = await create_embedding_function_instance() rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, embedding_func=embedding_func_instance, # OpenSearch-backed storages kv_storage="OpenSearchKVStorage", doc_status_storage="OpenSearchDocStatusStorage", graph_storage="OpenSearchGraphStorage", vector_storage="OpenSearchVectorDBStorage", ) # REQUIRED: initialize all storage backends await rag.initialize_storages() # Clean previous data so the example is re-runnable # (LLM response cache is preserved for faster reruns) for storage in [ rag.full_docs, rag.text_chunks, rag.full_entities, rag.full_relations, rag.entity_chunks, rag.relation_chunks, rag.entities_vdb, rag.relationships_vdb, rag.chunks_vdb, rag.chunk_entity_relation_graph, rag.doc_status, ]: await storage.drop() print("Cleared previous data.") return rag # -------------------------------------------------- # Main # -------------------------------------------------- async def main(): rag = None try: print("Initializing LightRAG with OpenSearch + OpenAI...") rag = await initialize_rag() if not os.path.exists(BOOK_FILE): raise FileNotFoundError( f"'{BOOK_FILE}' not found. Please provide a text file to index." ) print(f"\nReading document: {BOOK_FILE}") with open(BOOK_FILE, "r", encoding="utf-8") as f: content = f.read() print(f"Loaded document ({len(content)} characters)") print("\nInserting document into LightRAG (this may take some time)...") await rag.ainsert(content) print("Document indexed successfully!") print("\n" + "=" * 60) print("Running sample queries") print("=" * 60) query = "What are the top themes in this document?" for mode in ["naive", "local", "global", "hybrid"]: print(f"\n[{mode.upper()} MODE]") result = await rag.aquery(query, param=QueryParam(mode=mode)) print(result) print("\nRAG system is ready for use!") except Exception as e: print("An error occurred:", e) import traceback traceback.print_exc() finally: if rag is not None: await rag.finalize_storages() if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: examples/lightrag_vllm_demo.py ================================================ """ LightRAG Demo with vLLM (LLM, Embeddings, and Reranker) This example demonstrates how to use LightRAG with: - vLLM-served LLM (OpenAI-compatible API) - vLLM-served embedding model - Jina-compatible reranker (also vLLM-served) Prerequisites: 1. Create a .env file or export environment variables: - LLM_MODEL - LLM_BINDING_HOST - LLM_BINDING_API_KEY - EMBEDDING_MODEL - EMBEDDING_BINDING_HOST - EMBEDDING_BINDING_API_KEY - EMBEDDING_DIM - EMBEDDING_TOKEN_LIMIT - RERANK_MODEL - RERANK_BINDING_HOST - RERANK_BINDING_API_KEY 2. Prepare a text file to index (default: Data/book-small.txt) 3. Configure storage backends via environment variables or modify the storage parameters in initialize_rag() below. Usage: python examples/lightrag_vllm_demo.py """ import os import asyncio from functools import partial from dotenv import load_dotenv from lightrag import LightRAG, QueryParam from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc from lightrag.rerank import jina_rerank load_dotenv() # -------------------------------------------------- # Constants # -------------------------------------------------- WORKING_DIR = "./LightRAG_Data" BOOK_FILE = "Data/book-small.txt" # -------------------------------------------------- # LLM function (vLLM, OpenAI-compatible) # -------------------------------------------------- async def llm_model_func( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await openai_complete_if_cache( model=os.getenv("LLM_MODEL", "Qwen/Qwen3-14B-AWQ"), prompt=prompt, system_prompt=system_prompt, history_messages=history_messages, base_url=os.getenv("LLM_BINDING_HOST", "http://0.0.0.0:4646/v1"), api_key=os.getenv("LLM_BINDING_API_KEY", "not_needed"), timeout=600, **kwargs, ) # -------------------------------------------------- # Embedding function (vLLM) # -------------------------------------------------- vLLM_emb_func = EmbeddingFunc( model_name=os.getenv("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B"), send_dimensions=False, embedding_dim=int(os.getenv("EMBEDDING_DIM", 1024)), max_token_size=int(os.getenv("EMBEDDING_TOKEN_LIMIT", 4096)), func=partial( openai_embed.func, model=os.getenv("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B"), base_url=os.getenv( "EMBEDDING_BINDING_HOST", "http://0.0.0.0:1234/v1", ), api_key=os.getenv("EMBEDDING_BINDING_API_KEY", "not_needed"), ), ) # -------------------------------------------------- # Reranker (Jina-compatible, vLLM-served) # -------------------------------------------------- jina_rerank_model_func = partial( jina_rerank, model=os.getenv("RERANK_MODEL", "Qwen/Qwen3-Reranker-0.6B"), api_key=os.getenv("RERANK_BINDING_API_KEY"), base_url=os.getenv( "RERANK_BINDING_HOST", "http://0.0.0.0:3535/v1/rerank", ), ) # -------------------------------------------------- # Initialize RAG # -------------------------------------------------- async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=vLLM_emb_func, rerank_model_func=jina_rerank_model_func, # Storage backends (configurable via environment or modify here) kv_storage=os.getenv("KV_STORAGE", "PGKVStorage"), doc_status_storage=os.getenv("DOC_STATUS_STORAGE", "PGDocStatusStorage"), vector_storage=os.getenv("VECTOR_STORAGE", "PGVectorStorage"), graph_storage=os.getenv("GRAPH_STORAGE", "Neo4JStorage"), ) await rag.initialize_storages() return rag # -------------------------------------------------- # Main # -------------------------------------------------- async def main(): rag = None try: # Validate book file exists if not os.path.exists(BOOK_FILE): raise FileNotFoundError( f"'{BOOK_FILE}' not found. Please provide a text file to index." ) rag = await initialize_rag() # -------------------------------------------------- # Data Ingestion # -------------------------------------------------- print(f"Indexing {BOOK_FILE}...") with open(BOOK_FILE, "r", encoding="utf-8") as f: await rag.ainsert(f.read()) print("Indexing complete.") # -------------------------------------------------- # Query # -------------------------------------------------- query = ( "What are the main themes of the book, and how do the key characters " "evolve throughout the story?" ) print("\nHybrid Search with Reranking:") result = await rag.aquery( query, param=QueryParam( mode="hybrid", stream=False, enable_rerank=True, ), ) print("\nResult:\n", result) except Exception as e: print(f"An error occurred: {e}") finally: if rag: await rag.finalize_storages() if __name__ == "__main__": asyncio.run(main()) print("\nDone!") ================================================ FILE: examples/milvus_kwargs_configuration_demo.py ================================================ """ Example: Configuring Milvus Index Parameters via vector_db_storage_cls_kwargs This example demonstrates how to configure Milvus indexing parameters through vector_db_storage_cls_kwargs, which is the recommended approach when using frameworks that build on top of LightRAG (like RAGAnything). This approach allows configuration to be passed through framework layers without requiring environment variable changes or direct code modifications. """ import os import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.openai import openai_complete_if_cache, openai_embed async def main(): # Configure Milvus connection os.environ["MILVUS_URI"] = "http://localhost:19530" # os.environ["MILVUS_USER"] = "root" # os.environ["MILVUS_PASSWORD"] = "your_password" # os.environ["MILVUS_DB_NAME"] = "lightrag" # Initialize LightRAG with Milvus index configuration via vector_db_storage_cls_kwargs # This is the recommended approach for framework integration (e.g., RAGAnything) rag = LightRAG( working_dir="./demo_index", llm_model_func=openai_complete_if_cache, embedding_func=openai_embed, # Specify Milvus as the vector storage backend vector_storage="MilvusVectorDBStorage", # Configure Milvus indexing parameters via vector_db_storage_cls_kwargs # These parameters are extracted and passed to MilvusIndexConfig vector_db_storage_cls_kwargs={ # Required parameter for all vector storage backends "cosine_better_than_threshold": 0.2, # Milvus index configuration parameters # All of these can be configured via vector_db_storage_cls_kwargs # Index type (AUTOINDEX, HNSW, HNSW_SQ, IVF_FLAT, etc.) "index_type": "HNSW", # Distance metric (COSINE, L2, IP) "metric_type": "COSINE", # HNSW parameters "hnsw_m": 32, # Number of connections per layer (2-2048) "hnsw_ef_construction": 256, # Size of dynamic candidate list during construction "hnsw_ef": 150, # Size of dynamic candidate list during search # IVF parameters (used when index_type is IVF_FLAT, IVF_SQ8, IVF_PQ) # "ivf_nlist": 2048, # Number of cluster units # "ivf_nprobe": 32, # Number of units to query # HNSW_SQ parameters (requires Milvus 2.6.8+) # "sq_type": "SQ8", # Quantization type (SQ4U, SQ6, SQ8, BF16, FP16) # "sq_refine": True, # Enable refinement # "sq_refine_type": "FP32", # Refinement type # "sq_refine_k": 20, # Number of candidates to refine }, ) # Initialize storage backends await rag.initialize_storages() print( "✅ LightRAG initialized with Milvus index configuration via vector_db_storage_cls_kwargs" ) print( f" Index Type: {rag.vector_db_storages['entities'].index_config.index_type}" ) print( f" Metric Type: {rag.vector_db_storages['entities'].index_config.metric_type}" ) print(f" HNSW M: {rag.vector_db_storages['entities'].index_config.hnsw_m}") print( f" HNSW EF Construction: {rag.vector_db_storages['entities'].index_config.hnsw_ef_construction}" ) print(f" HNSW EF: {rag.vector_db_storages['entities'].index_config.hnsw_ef}") # Example: Insert some text sample_text = """ LightRAG is a Retrieval-Augmented Generation framework that uses graph-based knowledge representation for enhanced information retrieval. It supports multiple vector storage backends including Milvus, which offers advanced indexing options for optimal performance. """ await rag.ainsert(sample_text) print("\n✅ Sample text inserted") # Example: Query with different modes result = await rag.aquery("What is LightRAG?", param=QueryParam(mode="hybrid")) print(f"\n✅ Query result: {result[:200]}...") # Cleanup await rag.finalize_storages() if __name__ == "__main__": print("=" * 80) print("Milvus Configuration via vector_db_storage_cls_kwargs Example") print("=" * 80) print() print("This example shows how to configure Milvus indexing parameters through") print("vector_db_storage_cls_kwargs, which is ideal for framework integration.") print() print("Key Benefits:") print(" • No environment variable changes required") print(" • Configuration can be passed through framework layers") print(" • Perfect for RAGAnything and similar frameworks") print(" • All 11 index parameters are supported") print() print("=" * 80) print() asyncio.run(main()) ================================================ FILE: examples/modalprocessors_example.py ================================================ """ Example of directly using modal processors This example demonstrates how to use LightRAG's modal processors directly without going through MinerU. """ import asyncio import argparse from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag import LightRAG from lightrag.utils import EmbeddingFunc from raganything.modalprocessors import ( ImageModalProcessor, TableModalProcessor, EquationModalProcessor, ) WORKING_DIR = "./rag_storage" def get_llm_model_func(api_key: str, base_url: str = None): return lambda prompt, system_prompt=None, history_messages=[], **kwargs: ( openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=api_key, base_url=base_url, **kwargs, ) ) def get_vision_model_func(api_key: str, base_url: str = None): return ( lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: ( openai_complete_if_cache( "gpt-4o", "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_data}" }, }, ], } if image_data else {"role": "user", "content": prompt}, ], api_key=api_key, base_url=base_url, **kwargs, ) if image_data else openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=api_key, base_url=base_url, **kwargs, ) ) ) async def process_image_example(lightrag: LightRAG, vision_model_func): """Example of processing an image""" # Create image processor image_processor = ImageModalProcessor( lightrag=lightrag, modal_caption_func=vision_model_func ) # Prepare image content image_content = { "img_path": "image.jpg", "img_caption": ["Example image caption"], "img_footnote": ["Example image footnote"], } # Process image description, entity_info = await image_processor.process_multimodal_content( modal_content=image_content, content_type="image", file_path="image_example.jpg", entity_name="Example Image", ) print("Image Processing Results:") print(f"Description: {description}") print(f"Entity Info: {entity_info}") async def process_table_example(lightrag: LightRAG, llm_model_func): """Example of processing a table""" # Create table processor table_processor = TableModalProcessor( lightrag=lightrag, modal_caption_func=llm_model_func ) # Prepare table content table_content = { "table_body": """ | Name | Age | Occupation | |------|-----|------------| | John | 25 | Engineer | | Mary | 30 | Designer | """, "table_caption": ["Employee Information Table"], "table_footnote": ["Data updated as of 2024"], } # Process table description, entity_info = await table_processor.process_multimodal_content( modal_content=table_content, content_type="table", file_path="table_example.md", entity_name="Employee Table", ) print("\nTable Processing Results:") print(f"Description: {description}") print(f"Entity Info: {entity_info}") async def process_equation_example(lightrag: LightRAG, llm_model_func): """Example of processing a mathematical equation""" # Create equation processor equation_processor = EquationModalProcessor( lightrag=lightrag, modal_caption_func=llm_model_func ) # Prepare equation content equation_content = {"text": "E = mc^2", "text_format": "LaTeX"} # Process equation description, entity_info = await equation_processor.process_multimodal_content( modal_content=equation_content, content_type="equation", file_path="equation_example.txt", entity_name="Mass-Energy Equivalence", ) print("\nEquation Processing Results:") print(f"Description: {description}") print(f"Entity Info: {entity_info}") async def initialize_rag(api_key: str, base_url: str = None): rag = LightRAG( working_dir=WORKING_DIR, embedding_func=EmbeddingFunc( embedding_dim=3072, max_token_size=8192, func=lambda texts: openai_embed( texts, model="text-embedding-3-large", api_key=api_key, base_url=base_url, ), ), llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: ( openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=api_key, base_url=base_url, **kwargs, ) ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): """Main function to run the example""" parser = argparse.ArgumentParser(description="Modal Processors Example") parser.add_argument("--api-key", required=True, help="OpenAI API key") parser.add_argument("--base-url", help="Optional base URL for API") parser.add_argument( "--working-dir", "-w", default=WORKING_DIR, help="Working directory path" ) args = parser.parse_args() # Run examples asyncio.run(main_async(args.api_key, args.base_url)) async def main_async(api_key: str, base_url: str = None): # Initialize LightRAG lightrag = await initialize_rag(api_key, base_url) # Get model functions llm_model_func = get_llm_model_func(api_key, base_url) vision_model_func = get_vision_model_func(api_key, base_url) # Run examples await process_image_example(lightrag, vision_model_func) await process_table_example(lightrag, llm_model_func) await process_equation_example(lightrag, llm_model_func) if __name__ == "__main__": main() ================================================ FILE: examples/opensearch_storage_demo.py ================================================ """ Integration test for OpenSearch Storage in LightRAG. Tests all 4 storage types against a live OpenSearch cluster: - KV Storage: CRUD, filter_keys - DocStatus Storage: CRUD, pagination (PIT + search_after), status counts - Graph Storage: nodes, edges, BFS traversal, search_labels - Vector Storage: k-NN upsert, query, get/delete Prerequisites: OpenSearch cluster running with k-NN plugin enabled. Set env vars: OPENSEARCH_HOSTS, OPENSEARCH_USER, OPENSEARCH_PASSWORD, OPENSEARCH_USE_SSL, OPENSEARCH_VERIFY_CERTS Usage: OPENSEARCH_HOSTS=localhost:9200 OPENSEARCH_USER=admin \ OPENSEARCH_PASSWORD= OPENSEARCH_USE_SSL=true \ OPENSEARCH_VERIFY_CERTS=false python examples/opensearch_storage_demo.py """ import asyncio import numpy as np from lightrag.kg.opensearch_impl import ( OpenSearchKVStorage, OpenSearchDocStatusStorage, OpenSearchGraphStorage, OpenSearchVectorDBStorage, ClientManager, ) from lightrag.kg.shared_storage import initialize_share_data from lightrag.base import DocStatus class MockEmbeddingFunc: """Mock embedding function for testing.""" def __init__(self, dim=128): self.embedding_dim = dim self.max_token_size = 512 self.model_name = "mock-embedding" async def __call__(self, texts, **kwargs): return np.random.rand(len(texts), self.embedding_dim).astype(np.float32) CONFIG = { "embedding_batch_num": 10, "max_graph_nodes": 1000, "vector_db_storage_cls_kwargs": {"cosine_better_than_threshold": 0.2}, } EMBED = MockEmbeddingFunc() PASSED = 0 FAILED = 0 def check(condition, msg): global PASSED, FAILED if condition: print(f" ✓ {msg}") PASSED += 1 else: print(f" ✗ {msg}") FAILED += 1 async def test_connection_manager(): print("\n=== Connection Manager ===") client1 = await ClientManager.get_client() client2 = await ClientManager.get_client() check(client1 is client2, "Singleton pattern (same instance)") await ClientManager.release_client(client1) await ClientManager.release_client(client2) check(True, "Released clients") async def test_kv_storage(): print("\n=== KV Storage ===") s = OpenSearchKVStorage( namespace="integ_kv", global_config=CONFIG, embedding_func=EMBED, workspace="integ", ) await s.initialize() try: await s.upsert({"k1": {"content": "hello"}, "k2": {"content": "world"}}) await s.index_done_callback() doc = await s.get_by_id("k1") check(doc is not None and doc.get("content") == "hello", "get_by_id") docs = await s.get_by_ids(["k1", "k2", "missing"]) check(docs[0] is not None and docs[2] is None, "get_by_ids preserves order") missing = await s.filter_keys({"k1", "k99"}) check(missing == {"k99"}, f"filter_keys: {missing}") check(not await s.is_empty(), "is_empty=False") await s.delete(["k2"]) await s.index_done_callback() check(await s.get_by_id("k2") is None, "delete + verify") finally: await s.drop() await s.finalize() async def test_doc_status_storage(): print("\n=== DocStatus Storage ===") s = OpenSearchDocStatusStorage( namespace="integ_ds", global_config=CONFIG, embedding_func=EMBED, workspace="integ", ) await s.initialize() try: # Insert docs await s.upsert( { f"d{i}": { "status": "processed" if i % 2 == 0 else "pending", "file_path": f"/file{i}.txt", "content_summary": f"summary {i}", "content_length": i * 10, "chunks_count": i, "created_at": 1000 + i, "updated_at": 2000 + i, } for i in range(20) } ) await s.index_done_callback() # Status counts counts = await s.get_all_status_counts() check(counts.get("all") == 20, f"all_status_counts: {counts}") check( counts.get("processed") == 10, f"processed count: {counts.get('processed')}" ) # get_docs_by_status (uses PIT + search_after) processed = await s.get_docs_by_status(DocStatus.PROCESSED) check(len(processed) == 10, f"get_docs_by_status(processed): {len(processed)}") # get_docs_by_track_id (uses PIT + search_after) await s.upsert( { "tracked1": { "status": "processed", "file_path": "/t.txt", "content_summary": "s", "content_length": 1, "chunks_count": 1, "created_at": 100, "updated_at": 200, "track_id": "batch-42", } } ) await s.index_done_callback() tracked = await s.get_docs_by_track_id("batch-42") check(len(tracked) == 1, f"get_docs_by_track_id: {len(tracked)}") # Paginated (uses PIT + search_after) page1, total = await s.get_docs_paginated(page=1, page_size=10) check(total == 21, f"paginated total: {total}") check(len(page1) == 10, f"page1 size: {len(page1)}") page2, _ = await s.get_docs_paginated(page=2, page_size=10) check(len(page2) == 10, f"page2 size: {len(page2)}") page3, _ = await s.get_docs_paginated(page=3, page_size=10) check(len(page3) == 1, f"page3 size: {len(page3)}") # With status filter filtered, ftotal = await s.get_docs_paginated( status_filter=DocStatus.PENDING, page=1, page_size=50 ) check(ftotal == 10, f"filtered total: {ftotal}") # get_doc_by_file_path doc = await s.get_doc_by_file_path("/file0.txt") check(doc is not None and doc["_id"] == "d0", "get_doc_by_file_path") finally: await s.drop() await s.finalize() async def test_graph_storage(): print("\n=== Graph Storage ===") s = OpenSearchGraphStorage( namespace="integ_graph", global_config=CONFIG, embedding_func=EMBED, workspace="integ", ) await s.initialize() try: # Upsert nodes and edges await s.upsert_node( "Alice", {"entity_type": "person", "description": "A researcher"} ) await s.upsert_node( "Bob", {"entity_type": "person", "description": "A developer"} ) await s.upsert_node( "Quantum", {"entity_type": "topic", "description": "Quantum computing"} ) await s.upsert_edge( "Alice", "Bob", {"relationship": "knows", "weight": "1.0", "keywords": "collab"}, ) await s.upsert_edge( "Alice", "Quantum", {"relationship": "researches", "weight": "2.0", "keywords": "research"}, ) await s.upsert_edge( "Bob", "Quantum", {"relationship": "studies", "weight": "0.5", "keywords": "learning"}, ) await s.index_done_callback() check(await s.has_node("Alice"), "has_node(Alice)") check(not await s.has_node("Nobody"), "has_node(Nobody)=False") check(await s.has_edge("Alice", "Bob"), "has_edge(Alice,Bob)") node = await s.get_node("Alice") check(node is not None and node.get("entity_type") == "person", "get_node") check(node.get("entity_id") == "Alice", "entity_id field present") check( await s.node_degree("Alice") == 2, f"node_degree(Alice)={await s.node_degree('Alice')}", ) edges = await s.get_node_edges("Alice") check(len(edges) == 2, f"get_node_edges: {len(edges)}") # Batch ops batch = await s.get_nodes_batch(["Alice", "Bob", "Missing"]) check("Alice" in batch and "Missing" not in batch, "get_nodes_batch") degrees = await s.node_degrees_batch(["Alice", "Bob", "Quantum"]) check(degrees.get("Alice") == 2, f"node_degrees_batch: {degrees}") # Knowledge graph (BFS) kg = await s.get_knowledge_graph("Alice", max_depth=2) check(len(kg.nodes) == 3, f"BFS nodes: {len(kg.nodes)}") check(len(kg.edges) == 3, f"BFS edges: {len(kg.edges)}") # get_all_labels (uses PIT) labels = await s.get_all_labels() check("Alice" in labels and "Bob" in labels, f"get_all_labels: {labels}") # get_all_nodes (uses PIT) all_nodes = await s.get_all_nodes() check(len(all_nodes) == 3, f"get_all_nodes: {len(all_nodes)}") # get_all_edges (uses PIT) all_edges = await s.get_all_edges() check(len(all_edges) == 3, f"get_all_edges: {len(all_edges)}") # search_labels found = await s.search_labels("ali", limit=10) check("Alice" in found, f"search_labels('ali'): {found}") # popular_labels popular = await s.get_popular_labels(limit=10) check(len(popular) > 0, f"get_popular_labels: {popular}") # Delete node (cascading) await s.delete_node("Bob") await s.index_done_callback() check(not await s.has_node("Bob"), "delete_node cascade") check(not await s.has_edge("Alice", "Bob"), "edges removed after delete_node") print(f" (PPL graphlookup: {s._ppl_graphlookup_available})") finally: await s.drop() await s.finalize() async def test_vector_storage(): print("\n=== Vector Storage ===") s = OpenSearchVectorDBStorage( namespace="integ_vec", global_config=CONFIG, embedding_func=EMBED, workspace="integ", meta_fields={"content", "entity_name"}, ) await s.initialize() try: await s.upsert( { "v1": {"content": "apple fruit"}, "v2": {"content": "banana fruit"}, "v3": {"content": "quantum physics"}, } ) await s.index_done_callback() results = await s.query("apple", top_k=3) check(len(results) > 0, f"query returned {len(results)} results") check(all("distance" in r for r in results), "results have distance") doc = await s.get_by_id("v1") check(doc is not None and doc["id"] == "v1", "get_by_id") docs = await s.get_by_ids(["v1", "v2", "missing"]) check(docs[0] is not None and docs[2] is None, "get_by_ids") vecs = await s.get_vectors_by_ids(["v1"]) check("v1" in vecs and len(vecs["v1"]) == 128, "get_vectors_by_ids") await s.delete(["v3"]) await s.index_done_callback() check(await s.get_by_id("v3") is None, "delete + verify") finally: await s.drop() await s.finalize() async def main(): print("=" * 60) print("OpenSearch Storage Integration Tests") print("=" * 60) initialize_share_data(workers=1) try: await test_connection_manager() await test_kv_storage() await test_doc_status_storage() await test_graph_storage() await test_vector_storage() except Exception as e: print(f"\n✗ Fatal error: {e}") import traceback traceback.print_exc() print(f"\n{'=' * 60}") print(f"Results: {PASSED} passed, {FAILED} failed") print(f"{'=' * 60}") if FAILED > 0: exit(1) if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: examples/raganything_example.py ================================================ #!/usr/bin/env python """ Example script demonstrating the integration of MinerU parser with RAGAnything This example shows how to: 1. Process parsed documents with RAGAnything 2. Perform multimodal queries on the processed documents 3. Handle different types of content (text, images, tables) """ import os import argparse import asyncio import logging import logging.config from pathlib import Path # Add project root directory to Python path import sys sys.path.append(str(Path(__file__).parent.parent)) from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug from raganything import RAGAnything, RAGAnythingConfig def configure_logging(): """Configure logging for the application""" # Get log directory path from environment variable or use current directory log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, "raganything_example.log")) print(f"\nRAGAnything example log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_dir), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, }, } ) # Set the logger level to INFO logger.setLevel(logging.INFO) # Enable verbose debug if needed set_verbose_debug(os.getenv("VERBOSE", "false").lower() == "true") async def process_with_rag( file_path: str, output_dir: str, api_key: str, base_url: str = None, working_dir: str = None, ): """ Process document with RAGAnything Args: file_path: Path to the document output_dir: Output directory for RAG results api_key: OpenAI API key base_url: Optional base URL for API working_dir: Working directory for RAG storage """ try: # Create RAGAnything configuration config = RAGAnythingConfig( working_dir=working_dir or "./rag_storage", mineru_parse_method="auto", enable_image_processing=True, enable_table_processing=True, enable_equation_processing=True, ) # Define LLM model function def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): return openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=api_key, base_url=base_url, **kwargs, ) # Define vision model function for image processing def vision_model_func( prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs ): if image_data: return openai_complete_if_cache( "gpt-4o", "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_data}" }, }, ], } if image_data else {"role": "user", "content": prompt}, ], api_key=api_key, base_url=base_url, **kwargs, ) else: return llm_model_func(prompt, system_prompt, history_messages, **kwargs) # Define embedding function embedding_func = EmbeddingFunc( embedding_dim=3072, max_token_size=8192, func=lambda texts: openai_embed( texts, model="text-embedding-3-large", api_key=api_key, base_url=base_url, ), ) # Initialize RAGAnything with new dataclass structure rag = RAGAnything( config=config, llm_model_func=llm_model_func, vision_model_func=vision_model_func, embedding_func=embedding_func, ) # Process document await rag.process_document_complete( file_path=file_path, output_dir=output_dir, parse_method="auto" ) # Example queries - demonstrating different query approaches logger.info("\nQuerying processed document:") # 1. Pure text queries using aquery() text_queries = [ "What is the main content of the document?", "What are the key topics discussed?", ] for query in text_queries: logger.info(f"\n[Text Query]: {query}") result = await rag.aquery(query, mode="hybrid") logger.info(f"Answer: {result}") # 2. Multimodal query with specific multimodal content using aquery_with_multimodal() logger.info( "\n[Multimodal Query]: Analyzing performance data in context of document" ) multimodal_result = await rag.aquery_with_multimodal( "Compare this performance data with any similar results mentioned in the document", multimodal_content=[ { "type": "table", "table_data": """Method,Accuracy,Processing_Time RAGAnything,95.2%,120ms Traditional_RAG,87.3%,180ms Baseline,82.1%,200ms""", "table_caption": "Performance comparison results", } ], mode="hybrid", ) logger.info(f"Answer: {multimodal_result}") # 3. Another multimodal query with equation content logger.info("\n[Multimodal Query]: Mathematical formula analysis") equation_result = await rag.aquery_with_multimodal( "Explain this formula and relate it to any mathematical concepts in the document", multimodal_content=[ { "type": "equation", "latex": "F1 = 2 \\cdot \\frac{precision \\cdot recall}{precision + recall}", "equation_caption": "F1-score calculation formula", } ], mode="hybrid", ) logger.info(f"Answer: {equation_result}") except Exception as e: logger.error(f"Error processing with RAG: {str(e)}") import traceback logger.error(traceback.format_exc()) def main(): """Main function to run the example""" parser = argparse.ArgumentParser(description="MinerU RAG Example") parser.add_argument("file_path", help="Path to the document to process") parser.add_argument( "--working_dir", "-w", default="./rag_storage", help="Working directory path" ) parser.add_argument( "--output", "-o", default="./output", help="Output directory path" ) parser.add_argument( "--api-key", default=os.getenv("OPENAI_API_KEY"), help="OpenAI API key (defaults to OPENAI_API_KEY env var)", ) parser.add_argument("--base-url", help="Optional base URL for API") args = parser.parse_args() # Check if API key is provided if not args.api_key: logger.error("Error: OpenAI API key is required") logger.error("Set OPENAI_API_KEY environment variable or use --api-key option") return # Create output directory if specified if args.output: os.makedirs(args.output, exist_ok=True) # Process with RAG asyncio.run( process_with_rag( args.file_path, args.output, args.api_key, args.base_url, args.working_dir ) ) if __name__ == "__main__": # Configure logging first configure_logging() print("RAGAnything Example") print("=" * 30) print("Processing document with multimodal RAG pipeline") print("=" * 30) main() ================================================ FILE: examples/rerank_example.py ================================================ """ LightRAG Rerank Integration Example This example demonstrates how to use rerank functionality with LightRAG to improve retrieval quality across different query modes. Configuration Required: 1. Set your OpenAI LLM API key and base URL with env vars LLM_MODEL LLM_BINDING_HOST LLM_BINDING_API_KEY 2. Set your OpenAI embedding API key and base URL with env vars: EMBEDDING_MODEL EMBEDDING_DIM EMBEDDING_BINDING_HOST EMBEDDING_BINDING_API_KEY 3. Set your vLLM deployed AI rerank model setting with env vars: RERANK_BINDING=cohere RERANK_MODEL (e.g., answerai-colbert-small-v1 or rerank-v3.5) RERANK_BINDING_HOST (e.g., https://api.cohere.com/v2/rerank or LiteLLM proxy) RERANK_BINDING_API_KEY RERANK_ENABLE_CHUNKING=true (optional, for models with token limits) RERANK_MAX_TOKENS_PER_DOC=480 (optional, default 4096) Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True) """ import asyncio import os import numpy as np from lightrag import LightRAG, QueryParam from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc, setup_logger from functools import partial from lightrag.rerank import cohere_rerank # Set up your working directory WORKING_DIR = "./test_rerank" setup_logger("test_rerank") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def llm_model_func( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await openai_complete_if_cache( os.getenv("LLM_MODEL"), prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=os.getenv("LLM_BINDING_API_KEY"), base_url=os.getenv("LLM_BINDING_HOST"), **kwargs, ) async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed( texts, model=os.getenv("EMBEDDING_MODEL"), api_key=os.getenv("EMBEDDING_BINDING_API_KEY"), base_url=os.getenv("EMBEDDING_BINDING_HOST"), ) rerank_model_func = partial( cohere_rerank, model=os.getenv("RERANK_MODEL", "rerank-v3.5"), api_key=os.getenv("RERANK_BINDING_API_KEY"), base_url=os.getenv("RERANK_BINDING_HOST", "https://api.cohere.com/v2/rerank"), enable_chunking=os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true", max_tokens_per_doc=int(os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")), ) async def create_rag_with_rerank(): """Create LightRAG instance with rerank configuration""" # Get embedding dimension test_embedding = await embedding_func(["test"]) embedding_dim = test_embedding.shape[1] print(f"Detected embedding dimension: {embedding_dim}") # Method 1: Using custom rerank function rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=embedding_dim, max_token_size=8192, func=embedding_func, ), # Rerank Configuration - provide the rerank function rerank_model_func=rerank_model_func, ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag async def test_rerank_with_different_settings(): """ Test rerank functionality with different enable_rerank settings """ print("\n\n🚀 Setting up LightRAG with Rerank functionality...") rag = await create_rag_with_rerank() # Insert sample documents sample_docs = [ "Reranking improves retrieval quality by re-ordering documents based on relevance.", "LightRAG is a powerful retrieval-augmented generation system with multiple query modes.", "Vector databases enable efficient similarity search in high-dimensional embedding spaces.", "Natural language processing has evolved with large language models and transformers.", "Machine learning algorithms can learn patterns from data without explicit programming.", ] print("📄 Inserting sample documents...") await rag.ainsert(sample_docs) query = "How does reranking improve retrieval quality?" print(f"\n🔍 Testing query: '{query}'") print("=" * 80) # Test with rerank enabled (default) print("\n📊 Testing with enable_rerank=True (default):") result_with_rerank = await rag.aquery( query, param=QueryParam( mode="naive", top_k=10, chunk_top_k=5, enable_rerank=True, # Explicitly enable rerank ), ) print(f" Result length: {len(result_with_rerank)} characters") print(f" Preview: {result_with_rerank[:100]}...") # Test with rerank disabled print("\n📊 Testing with enable_rerank=False:") result_without_rerank = await rag.aquery( query, param=QueryParam( mode="naive", top_k=10, chunk_top_k=5, enable_rerank=False, # Disable rerank ), ) print(f" Result length: {len(result_without_rerank)} characters") print(f" Preview: {result_without_rerank[:100]}...") # Test with default settings (enable_rerank defaults to True) print("\n📊 Testing with default settings (enable_rerank defaults to True):") result_default = await rag.aquery( query, param=QueryParam(mode="naive", top_k=10, chunk_top_k=5) ) print(f" Result length: {len(result_default)} characters") print(f" Preview: {result_default[:100]}...") async def test_direct_rerank(): """Test rerank function directly""" print("\n🔧 Direct Rerank API Test") print("=" * 40) documents = [ "Vector search finds semantically similar documents", "LightRAG supports advanced reranking capabilities", "Reranking significantly improves retrieval quality", "Natural language processing with modern transformers", "The quick brown fox jumps over the lazy dog", ] query = "rerank improve quality" print(f"Query: '{query}'") print(f"Documents: {len(documents)}") try: reranked_results = await rerank_model_func( query=query, documents=documents, top_n=4, ) print("\n✅ Rerank Results:") i = 0 for result in reranked_results: index = result["index"] score = result["relevance_score"] content = documents[index] print(f" {index}. Score: {score:.4f} | {content}...") i += 1 except Exception as e: print(f"❌ Rerank failed: {e}") async def main(): """Main example function""" print("🎯 LightRAG Rerank Integration Example") print("=" * 60) try: # Test direct rerank await test_direct_rerank() # Test rerank with different enable_rerank settings await test_rerank_with_different_settings() print("\n✅ Example completed successfully!") print("\n💡 Key Points:") print(" ✓ Rerank is now controlled per query via 'enable_rerank' parameter") print(" ✓ Default value for enable_rerank is True") print(" ✓ Rerank function is configured at LightRAG initialization") print(" ✓ Per-query enable_rerank setting overrides default behavior") print( " ✓ If enable_rerank=True but no rerank model is configured, a warning is issued" ) print(" ✓ Monitor API usage and costs when using rerank services") except Exception as e: print(f"\n❌ Example failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: examples/unofficial-sample/copy_llm_cache_to_another_storage.py ================================================ """ Sometimes you need to switch a storage solution, but you want to save LLM token and time. This handy script helps you to copy the LLM caches from one storage solution to another. (Not all the storage impl are supported) """ import asyncio import logging import os from dotenv import load_dotenv from lightrag.kg.postgres_impl import PostgreSQLDB, PGKVStorage from lightrag.kg.json_kv_impl import JsonKVStorage from lightrag.namespace import NameSpace load_dotenv() ROOT_DIR = os.environ.get("ROOT_DIR") WORKING_DIR = f"{ROOT_DIR}/dickens" logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # AGE os.environ["AGE_GRAPH_NAME"] = "chinese" postgres_db = PostgreSQLDB( config={ "host": "localhost", "port": 15432, "user": "rag", "password": "rag", "database": "r2", } ) async def copy_from_postgres_to_json(): await postgres_db.initdb() from_llm_response_cache = PGKVStorage( namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE, global_config={"embedding_batch_num": 6}, embedding_func=None, db=postgres_db, ) to_llm_response_cache = JsonKVStorage( namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE, global_config={"working_dir": WORKING_DIR}, embedding_func=None, ) # Get all cache data using the new flattened structure all_data = await from_llm_response_cache.get_all() # Convert flattened data to hierarchical structure for JsonKVStorage kv = {} for flattened_key, cache_entry in all_data.items(): # Parse flattened key: {mode}:{cache_type}:{hash} parts = flattened_key.split(":", 2) if len(parts) == 3: mode, cache_type, hash_value = parts if mode not in kv: kv[mode] = {} kv[mode][hash_value] = cache_entry print(f"Copying {flattened_key} -> {mode}[{hash_value}]") else: print(f"Skipping invalid key format: {flattened_key}") await to_llm_response_cache.upsert(kv) await to_llm_response_cache.index_done_callback() print("Mission accomplished!") async def copy_from_json_to_postgres(): await postgres_db.initdb() from_llm_response_cache = JsonKVStorage( namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE, global_config={"working_dir": WORKING_DIR}, embedding_func=None, ) to_llm_response_cache = PGKVStorage( namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE, global_config={"embedding_batch_num": 6}, embedding_func=None, db=postgres_db, ) # Get all cache data from JsonKVStorage (hierarchical structure) all_data = await from_llm_response_cache.get_all() # Convert hierarchical data to flattened structure for PGKVStorage flattened_data = {} for mode, mode_data in all_data.items(): print(f"Processing mode: {mode}") for hash_value, cache_entry in mode_data.items(): # Determine cache_type from cache entry or use default cache_type = cache_entry.get("cache_type", "extract") # Create flattened key: {mode}:{cache_type}:{hash} flattened_key = f"{mode}:{cache_type}:{hash_value}" flattened_data[flattened_key] = cache_entry print(f"\tConverting {mode}[{hash_value}] -> {flattened_key}") # Upsert the flattened data await to_llm_response_cache.upsert(flattened_data) print("Mission accomplished!") if __name__ == "__main__": asyncio.run(copy_from_json_to_postgres()) ================================================ FILE: examples/unofficial-sample/lightrag_bedrock_demo.py ================================================ """ LightRAG meets Amazon Bedrock ⛰️ """ import os import logging from lightrag import LightRAG, QueryParam from lightrag.llm.bedrock import bedrock_complete, bedrock_embed from lightrag.utils import EmbeddingFunc import asyncio import nest_asyncio nest_asyncio.apply() logging.getLogger("aiobotocore").setLevel(logging.WARNING) WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=bedrock_complete, llm_model_name="Anthropic Claude 3 Haiku // Amazon Bedrock", embedding_func=EmbeddingFunc( embedding_dim=1024, max_token_size=8192, func=bedrock_embed ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): rag = asyncio.run(initialize_rag()) with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) for mode in ["naive", "local", "global", "hybrid"]: print("\n+-" + "-" * len(mode) + "-+") print(f"| {mode.capitalize()} |") print("+-" + "-" * len(mode) + "-+\n") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode=mode) ) ) if __name__ == "__main__": main() ================================================ FILE: examples/unofficial-sample/lightrag_cloudflare_demo.py ================================================ import asyncio import os import inspect import logging import logging.config from lightrag import LightRAG, QueryParam from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug import requests import numpy as np from dotenv import load_dotenv """This code is a modified version of lightrag_openai_demo.py""" # ideally, as always, env! load_dotenv(dotenv_path=".env", override=False) """ ----========= IMPORTANT CHANGE THIS! =========---- """ cloudflare_api_key = "YOUR_API_KEY" account_id = "YOUR_ACCOUNT ID" # This is unique to your Cloudflare account # Authomatically changes api_base_url = f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/" # choose an embedding model EMBEDDING_MODEL = "@cf/baai/bge-m3" # choose a generative model LLM_MODEL = "@cf/meta/llama-3.2-3b-instruct" WORKING_DIR = "../dickens" # you can change output as desired # Cloudflare init class CloudflareWorker: def __init__( self, cloudflare_api_key: str, api_base_url: str, llm_model_name: str, embedding_model_name: str, max_tokens: int = 4080, max_response_tokens: int = 4080, ): self.cloudflare_api_key = cloudflare_api_key self.api_base_url = api_base_url self.llm_model_name = llm_model_name self.embedding_model_name = embedding_model_name self.max_tokens = max_tokens self.max_response_tokens = max_response_tokens async def _send_request(self, model_name: str, input_: dict, debug_log: str): headers = {"Authorization": f"Bearer {self.cloudflare_api_key}"} print(f""" data sent to Cloudflare ~~~~~~~~~~~ {debug_log} """) try: response_raw = requests.post( f"{self.api_base_url}{model_name}", headers=headers, json=input_ ).json() print(f""" Cloudflare worker responded with: ~~~~~~~~~~~ {str(response_raw)} """) result = response_raw.get("result", {}) if "data" in result: # Embedding case return np.array(result["data"]) if "response" in result: # LLM response return result["response"] raise ValueError("Unexpected Cloudflare response format") except Exception as e: print(f""" Cloudflare API returned: ~~~~~~~~~ Error: {e} """) input("Press Enter to continue...") return None async def query(self, prompt, system_prompt: str = "", **kwargs) -> str: # since no caching is used and we don't want to mess with everything lightrag, pop the kwarg it is kwargs.pop("hashing_kv", None) message = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] input_ = { "messages": message, "max_tokens": self.max_tokens, "response_token_limit": self.max_response_tokens, } return await self._send_request( self.llm_model_name, input_, debug_log=f"\n- model used {self.llm_model_name}\n- system prompt: {system_prompt}\n- query: {prompt}", ) async def embedding_chunk(self, texts: list[str]) -> np.ndarray: print(f""" TEXT inputted ~~~~~ {texts} """) input_ = { "text": texts, "max_tokens": self.max_tokens, "response_token_limit": self.max_response_tokens, } return await self._send_request( self.embedding_model_name, input_, debug_log=f"\n-llm model name {self.embedding_model_name}\n- texts: {texts}", ) def configure_logging(): """Configure logging for the application""" # Reset any existing handlers to ensure clean configuration for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: logger_instance = logging.getLogger(logger_name) logger_instance.handlers = [] logger_instance.filters = [] # Get log directory path from environment variable or use current directory log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath( os.path.join(log_dir, "lightrag_cloudflare_worker_demo.log") ) print(f"\nLightRAG compatible demo log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_file_path), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, }, } ) # Set the logger level to INFO logger.setLevel(logging.INFO) # Enable verbose debug if needed set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): cloudflare_worker = CloudflareWorker( cloudflare_api_key=cloudflare_api_key, api_base_url=api_base_url, embedding_model_name=EMBEDDING_MODEL, llm_model_name=LLM_MODEL, ) rag = LightRAG( working_dir=WORKING_DIR, max_parallel_insert=2, llm_model_func=cloudflare_worker.query, llm_model_name=os.getenv("LLM_MODEL", LLM_MODEL), summary_max_tokens=4080, embedding_func=EmbeddingFunc( embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")), max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "2048")), func=lambda texts: cloudflare_worker.embedding_chunk( texts, ), ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag async def print_stream(stream): async for chunk in stream: print(chunk, end="", flush=True) async def main(): try: # Clear old data files files_to_delete = [ "graph_chunk_entity_relation.graphml", "kv_store_doc_status.json", "kv_store_full_docs.json", "kv_store_text_chunks.json", "vdb_chunks.json", "vdb_entities.json", "vdb_relationships.json", ] for file in files_to_delete: file_path = os.path.join(WORKING_DIR, file) if os.path.exists(file_path): os.remove(file_path) print(f"Deleting old file:: {file_path}") # Initialize RAG instance rag = await initialize_rag() # Test embedding function test_text = ["This is a test string for embedding."] embedding = await rag.embedding_func(test_text) embedding_dim = embedding.shape[1] print("\n=======================") print("Test embedding function") print("========================") print(f"Test dict: {test_text}") print(f"Detected embedding dimension: {embedding_dim}\n\n") # Locate the location of what is needed to be added to the knowledge # Can add several simultaneously by modifying code with open("./book.txt", "r", encoding="utf-8") as f: await rag.ainsert(f.read()) # Perform naive search print("\n=====================") print("Query mode: naive") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="naive", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform local search print("\n=====================") print("Query mode: local") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="local", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform global search print("\n=====================") print("Query mode: global") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="global", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) # Perform hybrid search print("\n=====================") print("Query mode: hybrid") print("=====================") resp = await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="hybrid", stream=True), ) if inspect.isasyncgen(resp): await print_stream(resp) else: print(resp) """ FOR TESTING (if you want to test straight away, after building. Uncomment this part""" """ print("\n" + "=" * 60) print("AI ASSISTANT READY!") print("Ask questions about (your uploaded) regulations") print("Type 'quit' to exit") print("=" * 60) while True: question = input("\n🔥 Your question: ") if question.lower() in ['quit', 'exit', 'bye']: break print("\nThinking...") response = await rag.aquery(question, param=QueryParam(mode="hybrid")) print(f"\nAnswer: {response}") """ except Exception as e: print(f"An error occurred: {e}") finally: if rag: await rag.llm_response_cache.index_done_callback() await rag.finalize_storages() if __name__ == "__main__": # Configure logging before running the main function configure_logging() asyncio.run(main()) print("\nDone!") ================================================ FILE: examples/unofficial-sample/lightrag_hf_demo.py ================================================ import os from lightrag import LightRAG, QueryParam from lightrag.llm.hf import hf_model_complete, hf_embed from lightrag.utils import EmbeddingFunc from transformers import AutoModel, AutoTokenizer import asyncio import nest_asyncio nest_asyncio.apply() WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=hf_model_complete, llm_model_name="meta-llama/Llama-3.1-8B-Instruct", embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=5000, func=lambda texts: hf_embed( texts, tokenizer=AutoTokenizer.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), embed_model=AutoModel.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), ), ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): rag = asyncio.run(initialize_rag()) with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Perform naive search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) # Perform local search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) # Perform global search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) # Perform hybrid search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: examples/unofficial-sample/lightrag_llamaindex_direct_demo.py ================================================ import os from lightrag import LightRAG, QueryParam from lightrag.llm.llama_index_impl import ( llama_index_complete_if_cache, llama_index_embed, ) from lightrag.utils import EmbeddingFunc from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding import asyncio import nest_asyncio nest_asyncio.apply() # Configure working directory WORKING_DIR = "./index_default" print(f"WORKING_DIR: {WORKING_DIR}") # Model configuration LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4") print(f"LLM_MODEL: {LLM_MODEL}") EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large") print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}") EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}") # OpenAI configuration OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here") if not os.path.exists(WORKING_DIR): print(f"Creating working directory: {WORKING_DIR}") os.mkdir(WORKING_DIR) # Initialize LLM function async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): try: # Initialize OpenAI if not in kwargs if "llm_instance" not in kwargs: llm_instance = OpenAI( model=LLM_MODEL, api_key=OPENAI_API_KEY, temperature=0.7, ) kwargs["llm_instance"] = llm_instance response = await llama_index_complete_if_cache( kwargs["llm_instance"], prompt, system_prompt=system_prompt, history_messages=history_messages, **kwargs, ) return response except Exception as e: print(f"LLM request failed: {str(e)}") raise # Initialize embedding function async def embedding_func(texts): try: embed_model = OpenAIEmbedding( model=EMBEDDING_MODEL, api_key=OPENAI_API_KEY, ) return await llama_index_embed(texts, embed_model=embed_model) except Exception as e: print(f"Embedding failed: {str(e)}") raise # Get embedding dimension async def get_embedding_dim(): test_text = ["This is a test sentence."] embedding = await embedding_func(test_text) embedding_dim = embedding.shape[1] print(f"embedding_dim={embedding_dim}") return embedding_dim async def initialize_rag(): embedding_dimension = await get_embedding_dim() rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=EMBEDDING_MAX_TOKEN_SIZE, func=embedding_func, ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) # Insert example text with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Test different query modes print("\nNaive Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) print("\nLocal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) print("\nGlobal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) print("\nHybrid Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py ================================================ import os from lightrag import LightRAG, QueryParam from lightrag.llm.llama_index_impl import ( llama_index_complete_if_cache, llama_index_embed, ) from lightrag.utils import EmbeddingFunc from llama_index.llms.litellm import LiteLLM from llama_index.embeddings.litellm import LiteLLMEmbedding import asyncio import nest_asyncio nest_asyncio.apply() # Configure working directory WORKING_DIR = "./index_default" print(f"WORKING_DIR: {WORKING_DIR}") # Model configuration LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4") print(f"LLM_MODEL: {LLM_MODEL}") EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large") print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}") EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}") # LiteLLM configuration LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000") print(f"LITELLM_URL: {LITELLM_URL}") LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-1234") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # Initialize LLM function async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): try: # Initialize LiteLLM if not in kwargs if "llm_instance" not in kwargs: llm_instance = LiteLLM( model=f"openai/{LLM_MODEL}", # Format: "provider/model_name" api_base=LITELLM_URL, api_key=LITELLM_KEY, temperature=0.7, ) kwargs["llm_instance"] = llm_instance response = await llama_index_complete_if_cache( kwargs["llm_instance"], prompt, system_prompt=system_prompt, history_messages=history_messages, ) return response except Exception as e: print(f"LLM request failed: {str(e)}") raise # Initialize embedding function async def embedding_func(texts): try: embed_model = LiteLLMEmbedding( model_name=f"openai/{EMBEDDING_MODEL}", api_base=LITELLM_URL, api_key=LITELLM_KEY, ) return await llama_index_embed(texts, embed_model=embed_model) except Exception as e: print(f"Embedding failed: {str(e)}") raise # Get embedding dimension async def get_embedding_dim(): test_text = ["This is a test sentence."] embedding = await embedding_func(test_text) embedding_dim = embedding.shape[1] print(f"embedding_dim={embedding_dim}") return embedding_dim async def initialize_rag(): embedding_dimension = await get_embedding_dim() rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=EMBEDDING_MAX_TOKEN_SIZE, func=embedding_func, ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) # Insert example text with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Test different query modes print("\nNaive Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) print("\nLocal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) print("\nGlobal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) print("\nHybrid Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py ================================================ import os from lightrag import LightRAG, QueryParam from lightrag.llm.llama_index_impl import ( llama_index_complete_if_cache, llama_index_embed, ) from lightrag.utils import EmbeddingFunc from llama_index.llms.litellm import LiteLLM from llama_index.embeddings.litellm import LiteLLMEmbedding import asyncio import nest_asyncio nest_asyncio.apply() # Configure working directory WORKING_DIR = "./index_default" print(f"WORKING_DIR: {WORKING_DIR}") # Model configuration LLM_MODEL = os.environ.get("LLM_MODEL", "gemma-3-4b") print(f"LLM_MODEL: {LLM_MODEL}") EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "arctic-embed") print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}") EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}") # LiteLLM configuration LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000") print(f"LITELLM_URL: {LITELLM_URL}") LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-4JdvGFKqSA3S0k_5p0xufw") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # Initialize LLM function async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): try: # Initialize LiteLLM if not in kwargs if "llm_instance" not in kwargs: llm_instance = LiteLLM( model=f"openai/{LLM_MODEL}", # Format: "provider/model_name" api_base=LITELLM_URL, api_key=LITELLM_KEY, temperature=0.7, ) kwargs["llm_instance"] = llm_instance chat_kwargs = {} chat_kwargs["litellm_params"] = { "metadata": { "opik": { "project_name": "lightrag_llamaindex_litellm_opik_demo", "tags": ["lightrag", "litellm"], } } } response = await llama_index_complete_if_cache( kwargs["llm_instance"], prompt, system_prompt=system_prompt, history_messages=history_messages, chat_kwargs=chat_kwargs, ) return response except Exception as e: print(f"LLM request failed: {str(e)}") raise # Initialize embedding function async def embedding_func(texts): try: embed_model = LiteLLMEmbedding( model_name=f"openai/{EMBEDDING_MODEL}", api_base=LITELLM_URL, api_key=LITELLM_KEY, ) return await llama_index_embed(texts, embed_model=embed_model) except Exception as e: print(f"Embedding failed: {str(e)}") raise # Get embedding dimension async def get_embedding_dim(): test_text = ["This is a test sentence."] embedding = await embedding_func(test_text) embedding_dim = embedding.shape[1] print(f"embedding_dim={embedding_dim}") return embedding_dim async def initialize_rag(): embedding_dimension = await get_embedding_dim() rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=EMBEDDING_MAX_TOKEN_SIZE, func=embedding_func, ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) # Insert example text with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Test different query modes print("\nNaive Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) print("\nLocal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) print("\nGlobal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) print("\nHybrid Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: examples/unofficial-sample/lightrag_lmdeploy_demo.py ================================================ import os from lightrag import LightRAG, QueryParam from lightrag.llm.lmdeploy import lmdeploy_model_if_cache from lightrag.llm.hf import hf_embed from lightrag.utils import EmbeddingFunc from transformers import AutoModel, AutoTokenizer import asyncio import nest_asyncio nest_asyncio.apply() WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) async def lmdeploy_model_complete( prompt=None, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs, ) -> str: model_name = kwargs["hashing_kv"].global_config["llm_model_name"] return await lmdeploy_model_if_cache( model_name, prompt, system_prompt=system_prompt, history_messages=history_messages, ## please specify chat_template if your local path does not follow original HF file name, ## or model_name is a pytorch model on huggingface.co, ## you can refer to https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/model.py ## for a list of chat_template available in lmdeploy. chat_template="llama3", # model_format ='awq', # if you are using awq quantization model. # quant_policy=8, # if you want to use online kv cache, 4=kv int4, 8=kv int8. **kwargs, ) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=lmdeploy_model_complete, llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=5000, func=lambda texts: hf_embed( texts, tokenizer=AutoTokenizer.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), embed_model=AutoModel.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), ), ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) # Insert example text with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Test different query modes print("\nNaive Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) print("\nLocal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) print("\nGlobal Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) print("\nHybrid Search:") print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: examples/unofficial-sample/lightrag_nvidia_demo.py ================================================ import os import asyncio import nest_asyncio from lightrag import LightRAG, QueryParam from lightrag.llm import ( openai_complete_if_cache, nvidia_openai_embed, ) from lightrag.utils import EmbeddingFunc import numpy as np # for custom llm_model_func from lightrag.utils import locate_json_string_body_from_string nest_asyncio.apply() WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # some method to use your API key (choose one) # NVIDIA_OPENAI_API_KEY = os.getenv("NVIDIA_OPENAI_API_KEY") NVIDIA_OPENAI_API_KEY = "nvapi-xxxx" # your api key # using pre-defined function for nvidia LLM API. OpenAI compatible # llm_model_func = nvidia_openai_complete # If you trying to make custom llm_model_func to use llm model on NVIDIA API like other example: async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: result = await openai_complete_if_cache( "nvidia/llama-3.1-nemotron-70b-instruct", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=NVIDIA_OPENAI_API_KEY, base_url="https://integrate.api.nvidia.com/v1", **kwargs, ) if keyword_extraction: return locate_json_string_body_from_string(result) return result # custom embedding nvidia_embed_model = "nvidia/nv-embedqa-e5-v5" async def indexing_embedding_func(texts: list[str]) -> np.ndarray: return await nvidia_openai_embed( texts, model=nvidia_embed_model, # maximum 512 token # model="nvidia/llama-3.2-nv-embedqa-1b-v1", api_key=NVIDIA_OPENAI_API_KEY, base_url="https://integrate.api.nvidia.com/v1", input_type="passage", trunc="END", # handling on server side if input token is longer than maximum token encode="float", ) async def query_embedding_func(texts: list[str]) -> np.ndarray: return await nvidia_openai_embed( texts, model=nvidia_embed_model, # maximum 512 token # model="nvidia/llama-3.2-nv-embedqa-1b-v1", api_key=NVIDIA_OPENAI_API_KEY, base_url="https://integrate.api.nvidia.com/v1", input_type="query", trunc="END", # handling on server side if input token is longer than maximum token encode="float", ) # dimension are same async def get_embedding_dim(): test_text = ["This is a test sentence."] embedding = await indexing_embedding_func(test_text) embedding_dim = embedding.shape[1] return embedding_dim # function test async def test_funcs(): result = await llm_model_func("How are you?") print("llm_model_func: ", result) result = await indexing_embedding_func(["How are you?"]) print("embedding_func: ", result) # asyncio.run(test_funcs()) async def initialize_rag(): embedding_dimension = await get_embedding_dim() print(f"Detected embedding dimension: {embedding_dimension}") # lightRAG class during indexing rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, # llm_model_name="meta/llama3-70b-instruct", #un comment if embedding_func=EmbeddingFunc( embedding_dim=embedding_dimension, max_token_size=512, # maximum token size, somehow it's still exceed maximum number of token # so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG # so you can adjust to be able to fit the NVIDIA model (future work) func=indexing_embedding_func, ), ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag async def main(): try: # Initialize RAG instance rag = await initialize_rag() # reading file with open("./book.txt", "r", encoding="utf-8") as f: await rag.ainsert(f.read()) # Perform naive search print("==============Naive===============") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) # Perform local search print("==============local===============") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) # Perform global search print("==============global===============") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="global"), ) ) # Perform hybrid search print("==============hybrid===============") print( await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="hybrid"), ) ) except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py ================================================ import os import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.ollama import ollama_embed, openai_complete_if_cache from lightrag.utils import EmbeddingFunc # WorkingDir ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) WORKING_DIR = os.path.join(ROOT_DIR, "myKG") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) print(f"WorkingDir: {WORKING_DIR}") # redis os.environ["REDIS_URI"] = "redis://localhost:6379" # neo4j BATCH_SIZE_NODES = 500 BATCH_SIZE_EDGES = 100 os.environ["NEO4J_URI"] = "neo4j://localhost:7687" os.environ["NEO4J_USERNAME"] = "neo4j" os.environ["NEO4J_PASSWORD"] = "12345678" # milvus os.environ["MILVUS_URI"] = "http://localhost:19530" os.environ["MILVUS_USER"] = "root" os.environ["MILVUS_PASSWORD"] = "Milvus" os.environ["MILVUS_DB_NAME"] = "lightrag" async def llm_model_func( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: return await openai_complete_if_cache( "deepseek-chat", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="", base_url="", **kwargs, ) embedding_func = EmbeddingFunc( embedding_dim=768, max_token_size=512, func=lambda texts: ollama_embed( texts, embed_model="shaw/dmeta-embedding-zh", host="http://117.50.173.35:11434" ), ) async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, summary_max_tokens=10000, embedding_func=embedding_func, chunk_token_size=512, chunk_overlap_token_size=256, kv_storage="RedisKVStorage", graph_storage="Neo4JStorage", vector_storage="MilvusVectorDBStorage", doc_status_storage="RedisKVStorage", ) await rag.initialize_storages() # Auto-initializes pipeline_status return rag def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Perform naive search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) # Perform local search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) # Perform global search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="global") ) ) # Perform hybrid search print( rag.query( "What are the top themes in this story?", param=QueryParam(mode="hybrid") ) ) if __name__ == "__main__": main() ================================================ FILE: k8s-deploy/README-zh.md ================================================ # LightRAG Helm Chart 这是用于在Kubernetes集群上部署LightRAG服务的Helm chart。 LightRAG有两种推荐的部署方法: 1. **轻量级部署**:使用内置轻量级存储,适合测试和小规模使用 2. **生产环境部署**:使用外部数据库(如PostgreSQL和Neo4J),适合生产环境和大规模使用 > 如果您想要部署过程的视频演示,可以查看[bilibili](https://www.bilibili.com/video/BV1bUJazBEq2/)上的视频教程,对于喜欢视觉指导的用户可能会有所帮助。 ## 前提条件 确保安装和配置了以下工具: * **Kubernetes集群** * 需要一个运行中的Kubernetes集群。 * 对于本地开发或演示,可以使用[Minikube](https://minikube.sigs.k8s.io/docs/start/)(需要≥2个CPU,≥4GB内存,以及Docker/VM驱动支持)。 * 任何标准的云端或本地Kubernetes集群(EKS、GKE、AKS等)也可以使用。 * **kubectl** * Kubernetes命令行工具,用于管理集群。 * 按照官方指南安装:[安装和设置kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl)。 * **Helm**(v3.x+) * Kubernetes包管理器,用于安装LightRAG。 * 通过官方指南安装:[安装Helm](https://helm.sh/docs/intro/install/)。 ## 轻量级部署(无需外部数据库) 这种部署选项使用内置的轻量级存储组件,非常适合测试、演示或小规模使用场景。无需外部数据库配置。 您可以使用提供的便捷脚本或直接使用Helm命令部署LightRAG。两种方法都配置了`lightrag/values.yaml`文件中定义的相同环境变量。 ### 使用便捷脚本(推荐): ```bash export OPENAI_API_BASE=<您的OPENAI_API_BASE> export OPENAI_API_KEY=<您的OPENAI_API_KEY> bash ./install_lightrag_dev.sh ``` ### 或直接使用Helm: ```bash # 您可以覆盖任何想要的环境参数 helm upgrade --install lightrag ./lightrag \ --namespace rag \ --set-string env.LIGHTRAG_KV_STORAGE=JsonKVStorage \ --set-string env.LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage \ --set-string env.LIGHTRAG_GRAPH_STORAGE=NetworkXStorage \ --set-string env.LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage \ --set-string env.LLM_BINDING=openai \ --set-string env.LLM_MODEL=gpt-4o-mini \ --set-string env.LLM_BINDING_HOST=$OPENAI_API_BASE \ --set-string env.LLM_BINDING_API_KEY=$OPENAI_API_KEY \ --set-string env.EMBEDDING_BINDING=openai \ --set-string env.EMBEDDING_MODEL=text-embedding-ada-002 \ --set-string env.EMBEDDING_DIM=1536 \ --set-string env.EMBEDDING_BINDING_API_KEY=$OPENAI_API_KEY ``` ### 访问应用程序: ```bash # 1. 在终端中运行此端口转发命令: kubectl --namespace rag port-forward svc/lightrag-dev 9621:9621 # 2. 当命令运行时,打开浏览器并导航到: # http://localhost:9621 ``` ## 生产环境部署(使用外部数据库) ### 1. 安装数据库 > 如果您已经准备好了数据库,可以跳过此步骤。详细信息可以在:[README.md](databases%2FREADME.md)中找到。 我们推荐使用KubeBlocks进行数据库部署。KubeBlocks是一个云原生数据库操作符,可以轻松地在Kubernetes上以生产规模运行任何数据库。 首先,安装KubeBlocks和KubeBlocks-Addons(如已安装可跳过): ```bash bash ./databases/01-prepare.sh ``` 然后安装所需的数据库。默认情况下,这将安装PostgreSQL和Neo4J,但您可以修改[00-config.sh](databases%2F00-config.sh)以根据需要选择不同的数据库: ```bash bash ./databases/02-install-database.sh ``` 验证集群是否正在运行: ```bash kubectl get clusters -n rag # 预期输出: # NAME CLUSTER-DEFINITION TERMINATION-POLICY STATUS AGE # neo4j-cluster Delete Running 39s # pg-cluster postgresql Delete Running 42s kubectl get po -n rag # 预期输出: # NAME READY STATUS RESTARTS AGE # neo4j-cluster-neo4j-0 1/1 Running 0 58s # pg-cluster-postgresql-0 4/4 Running 0 59s # pg-cluster-postgresql-1 4/4 Running 0 59s ``` ### 2. 安装LightRAG LightRAG及其数据库部署在同一Kubernetes集群中,使配置变得简单。 安装脚本会自动从KubeBlocks获取所有数据库连接信息,无需手动设置数据库凭证: ```bash export OPENAI_API_BASE=<您的OPENAI_API_BASE> export OPENAI_API_KEY=<您的OPENAI_API_KEY> bash ./install_lightrag.sh ``` ### 访问应用程序: ```bash # 1. 在终端中运行此端口转发命令: kubectl --namespace rag port-forward svc/lightrag 9621:9621 # 2. 当命令运行时,打开浏览器并导航到: # http://localhost:9621 ``` ## 配置 ### 修改资源配置 您可以通过修改`values.yaml`文件来配置LightRAG的资源使用: ```yaml replicaCount: 1 # 副本数量,可根据需要增加 resources: limits: cpu: 1000m # CPU限制,可根据需要调整 memory: 2Gi # 内存限制,可根据需要调整 requests: cpu: 500m # CPU请求,可根据需要调整 memory: 1Gi # 内存请求,可根据需要调整 ``` ### 修改持久存储 ```yaml persistence: enabled: true ragStorage: size: 10Gi # RAG存储大小,可根据需要调整 inputs: size: 5Gi # 输入数据存储大小,可根据需要调整 ``` ### 配置环境变量 `values.yaml`文件中的`env`部分包含LightRAG的所有环境配置,类似于`.env`文件。当使用helm upgrade或helm install命令时,可以使用--set标志覆盖这些变量。 ```yaml env: HOST: 0.0.0.0 PORT: 9621 WEBUI_TITLE: Graph RAG Engine WEBUI_DESCRIPTION: Simple and Fast Graph Based RAG System # LLM配置 LLM_BINDING: openai # LLM服务提供商 LLM_MODEL: gpt-4o-mini # LLM模型 LLM_BINDING_HOST: # API基础URL(可选) LLM_BINDING_API_KEY: # API密钥 # 嵌入配置 EMBEDDING_BINDING: openai # 嵌入服务提供商 EMBEDDING_MODEL: text-embedding-ada-002 # 嵌入模型 EMBEDDING_DIM: 1536 # 嵌入维度 EMBEDDING_BINDING_API_KEY: # API密钥 # 存储配置 LIGHTRAG_KV_STORAGE: PGKVStorage # 键值存储类型 LIGHTRAG_VECTOR_STORAGE: PGVectorStorage # 向量存储类型 LIGHTRAG_GRAPH_STORAGE: Neo4JStorage # 图存储类型 LIGHTRAG_DOC_STATUS_STORAGE: PGDocStatusStorage # 文档状态存储类型 ``` ## 注意事项 - 在部署前确保设置了所有必要的环境变量(API密钥和数据库密码) - 出于安全原因,建议使用环境变量传递敏感信息,而不是直接写入脚本或values文件 - 轻量级部署适合测试和小规模使用,但数据持久性和性能可能有限 - 生产环境部署(PostgreSQL + Neo4J)推荐用于生产环境和大规模使用 - 有关更多自定义配置,请参考LightRAG官方文档 ================================================ FILE: k8s-deploy/README.md ================================================ # LightRAG Helm Chart This is the Helm chart for LightRAG, used to deploy LightRAG services on a Kubernetes cluster. There are two recommended deployment methods for LightRAG: 1. **Lightweight Deployment**: Using built-in lightweight storage, suitable for testing and small-scale usage 2. **Production Deployment**: Using external databases (such as PostgreSQL and Neo4J), suitable for production environments and large-scale usage > If you'd like a video walkthrough of the deployment process, feel free to check out this optional [video tutorial](https://youtu.be/JW1z7fzeKTw?si=vPzukqqwmdzq9Q4q) on YouTube. It might help clarify some steps for those who prefer visual guidance. ## Prerequisites Make sure the following tools are installed and configured: * **Kubernetes cluster** * A running Kubernetes cluster is required. * For local development or demos you can use [Minikube](https://minikube.sigs.k8s.io/docs/start/) (needs ≥ 2 CPUs, ≥ 4 GB RAM, and Docker/VM-driver support). * Any standard cloud or on-premises Kubernetes cluster (EKS, GKE, AKS, etc.) also works. * **kubectl** * The Kubernetes command-line tool for managing your cluster. * Follow the official guide: [Install and Set Up kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl). * **Helm** (v3.x+) * Kubernetes package manager used to install LightRAG. * Install it via the official instructions: [Installing Helm](https://helm.sh/docs/intro/install/). ## Lightweight Deployment (No External Databases Required) This deployment option uses built-in lightweight storage components that are perfect for testing, demos, or small-scale usage scenarios. No external database configuration is required. You can deploy LightRAG using either the provided convenience script or direct Helm commands. Both methods configure the same environment variables defined in the `lightrag/values.yaml` file. ### Using the convenience script (recommended): ```bash export OPENAI_API_BASE= export OPENAI_API_KEY= bash ./install_lightrag_dev.sh ``` ### Or using Helm directly: ```bash # You can override any env param you want helm upgrade --install lightrag ./lightrag \ --namespace rag \ --set-string env.LIGHTRAG_KV_STORAGE=JsonKVStorage \ --set-string env.LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage \ --set-string env.LIGHTRAG_GRAPH_STORAGE=NetworkXStorage \ --set-string env.LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage \ --set-string env.LLM_BINDING=openai \ --set-string env.LLM_MODEL=gpt-4o-mini \ --set-string env.LLM_BINDING_HOST=$OPENAI_API_BASE \ --set-string env.LLM_BINDING_API_KEY=$OPENAI_API_KEY \ --set-string env.EMBEDDING_BINDING=openai \ --set-string env.EMBEDDING_MODEL=text-embedding-ada-002 \ --set-string env.EMBEDDING_DIM=1536 \ --set-string env.EMBEDDING_BINDING_API_KEY=$OPENAI_API_KEY ``` ### Accessing the application: ```bash # 1. Run this port-forward command in your terminal: kubectl --namespace rag port-forward svc/lightrag-dev 9621:9621 # 2. While the command is running, open your browser and navigate to: # http://localhost:9621 ``` ## Production Deployment (Using External Databases) ### 1. Install Databases > You can skip this step if you've already prepared databases. Detailed information can be found in: [README.md](databases%2FREADME.md). We recommend KubeBlocks for database deployment. KubeBlocks is a cloud-native database operator that makes it easy to run any database on Kubernetes at production scale. First, install KubeBlocks and KubeBlocks-Addons (skip if already installed): ```bash bash ./databases/01-prepare.sh ``` Then install the required databases. By default, this will install PostgreSQL and Neo4J, but you can modify [00-config.sh](databases%2F00-config.sh) to select different databases based on your needs: ```bash bash ./databases/02-install-database.sh ``` Verify that the clusters are up and running: ```bash kubectl get clusters -n rag # Expected output: # NAME CLUSTER-DEFINITION TERMINATION-POLICY STATUS AGE # neo4j-cluster Delete Running 39s # pg-cluster postgresql Delete Running 42s kubectl get po -n rag # Expected output: # NAME READY STATUS RESTARTS AGE # neo4j-cluster-neo4j-0 1/1 Running 0 58s # pg-cluster-postgresql-0 4/4 Running 0 59s # pg-cluster-postgresql-1 4/4 Running 0 59s ``` ### 2. Install LightRAG LightRAG and its databases are deployed within the same Kubernetes cluster, making configuration straightforward. The installation script automatically retrieves all database connection information from KubeBlocks, eliminating the need to manually set database credentials: ```bash export OPENAI_API_BASE= export OPENAI_API_KEY= bash ./install_lightrag.sh ``` ### Accessing the application: ```bash # 1. Run this port-forward command in your terminal: kubectl --namespace rag port-forward svc/lightrag 9621:9621 # 2. While the command is running, open your browser and navigate to: # http://localhost:9621 ``` ## Configuration ### Modifying Resource Configuration You can configure LightRAG's resource usage by modifying the `values.yaml` file: ```yaml replicaCount: 1 # Number of replicas, can be increased as needed resources: limits: cpu: 1000m # CPU limit, can be adjusted as needed memory: 2Gi # Memory limit, can be adjusted as needed requests: cpu: 500m # CPU request, can be adjusted as needed memory: 1Gi # Memory request, can be adjusted as needed ``` ### Modifying Persistent Storage ```yaml persistence: enabled: true ragStorage: size: 10Gi # RAG storage size, can be adjusted as needed inputs: size: 5Gi # Input data storage size, can be adjusted as needed ``` ### Configuring Environment Variables The `env` section in the `values.yaml` file contains all environment configurations for LightRAG, similar to a `.env` file. When using helm upgrade or helm install commands, you can override these with the --set flag. ```yaml env: HOST: 0.0.0.0 PORT: 9621 WEBUI_TITLE: Graph RAG Engine WEBUI_DESCRIPTION: Simple and Fast Graph Based RAG System # LLM Configuration LLM_BINDING: openai # LLM service provider LLM_MODEL: gpt-4o-mini # LLM model LLM_BINDING_HOST: # API base URL (optional) LLM_BINDING_API_KEY: # API key # Embedding Configuration EMBEDDING_BINDING: openai # Embedding service provider EMBEDDING_MODEL: text-embedding-ada-002 # Embedding model EMBEDDING_DIM: 1536 # Embedding dimension EMBEDDING_BINDING_API_KEY: # API key # Storage Configuration LIGHTRAG_KV_STORAGE: PGKVStorage # Key-value storage type LIGHTRAG_VECTOR_STORAGE: PGVectorStorage # Vector storage type LIGHTRAG_GRAPH_STORAGE: Neo4JStorage # Graph storage type LIGHTRAG_DOC_STATUS_STORAGE: PGDocStatusStorage # Document status storage type ``` ## Notes - Ensure all necessary environment variables (API keys and database passwords) are set before deployment - For security reasons, it's recommended to pass sensitive information using environment variables rather than writing them directly in scripts or values files - Lightweight deployment is suitable for testing and small-scale usage, but data persistence and performance may be limited - Production deployment (PostgreSQL + Neo4J) is recommended for production environments and large-scale usage - For more customized configurations, please refer to the official LightRAG documentation ================================================ FILE: k8s-deploy/databases/00-config.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" source "$DATABASE_SCRIPT_DIR/scripts/common.sh" # Namespace configuration NAMESPACE="rag" # version KB_VERSION="1.0.0-beta.48" ADDON_CLUSTER_CHART_VERSION="1.0.0-alpha.0" # Helm repository HELM_REPO="https://apecloud.github.io/helm-charts" # Set to true to enable the database, false to disable ENABLE_POSTGRESQL=true ENABLE_REDIS=false ENABLE_QDRANT=false ENABLE_NEO4J=true ENABLE_ELASTICSEARCH=false ENABLE_MONGODB=false ================================================ FILE: k8s-deploy/databases/01-prepare.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Load configuration file source "$DATABASE_SCRIPT_DIR/00-config.sh" check_dependencies # Check if KubeBlocks is already installed, install it if it is not. source "$DATABASE_SCRIPT_DIR/install-kubeblocks.sh" # Create namespaces print "Creating namespaces..." kubectl create namespace $NAMESPACE 2>/dev/null || true # Install database addons print "Installing KubeBlocks database addons..." # Add and update Helm repository print "Adding and updating KubeBlocks Helm repository..." helm repo add kubeblocks $HELM_REPO helm repo update # Install database addons based on configuration [ "$ENABLE_POSTGRESQL" = true ] && print "Installing PostgreSQL addon..." && helm upgrade --install kb-addon-postgresql kubeblocks/postgresql --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_REDIS" = true ] && print "Installing Redis addon..." && helm upgrade --install kb-addon-redis kubeblocks/redis --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_ELASTICSEARCH" = true ] && print "Installing Elasticsearch addon..." && helm upgrade --install kb-addon-elasticsearch kubeblocks/elasticsearch --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_QDRANT" = true ] && print "Installing Qdrant addon..." && helm upgrade --install kb-addon-qdrant kubeblocks/qdrant --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_MONGODB" = true ] && print "Installing MongoDB addon..." && helm upgrade --install kb-addon-mongodb kubeblocks/mongodb --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_NEO4J" = true ] && print "Installing Neo4j addon..." && helm upgrade --install kb-addon-neo4j kubeblocks/neo4j --namespace kb-system --version $ADDON_CLUSTER_CHART_VERSION print_success "KubeBlocks database addons installation completed!" print "Now you can run 02-install-database.sh to install database clusters" ================================================ FILE: k8s-deploy/databases/02-install-database.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Load configuration file source "$DATABASE_SCRIPT_DIR/00-config.sh" print "Installing database clusters..." # Install database clusters based on configuration [ "$ENABLE_POSTGRESQL" = true ] && print "Installing PostgreSQL cluster..." && helm upgrade --install pg-cluster kubeblocks/postgresql-cluster -f "$DATABASE_SCRIPT_DIR/postgresql/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_REDIS" = true ] && print "Installing Redis cluster..." && helm upgrade --install redis-cluster kubeblocks/redis-cluster -f "$DATABASE_SCRIPT_DIR/redis/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_ELASTICSEARCH" = true ] && print "Installing Elasticsearch cluster..." && helm upgrade --install es-cluster kubeblocks/elasticsearch-cluster -f "$DATABASE_SCRIPT_DIR/elasticsearch/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_QDRANT" = true ] && print "Installing Qdrant cluster..." && helm upgrade --install qdrant-cluster kubeblocks/qdrant-cluster -f "$DATABASE_SCRIPT_DIR/qdrant/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_MONGODB" = true ] && print "Installing MongoDB cluster..." && helm upgrade --install mongodb-cluster kubeblocks/mongodb-cluster -f "$DATABASE_SCRIPT_DIR/mongodb/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION [ "$ENABLE_NEO4J" = true ] && print "Installing Neo4j cluster..." && helm upgrade --install neo4j-cluster kubeblocks/neo4j-cluster -f "$DATABASE_SCRIPT_DIR/neo4j/values.yaml" --namespace $NAMESPACE --version $ADDON_CLUSTER_CHART_VERSION # Wait for databases to be ready print "Waiting for databases to be ready..." TIMEOUT=600 # Set timeout to 10 minutes START_TIME=$(date +%s) while true; do CURRENT_TIME=$(date +%s) ELAPSED=$((CURRENT_TIME - START_TIME)) if [ $ELAPSED -gt $TIMEOUT ]; then print_error "Timeout waiting for databases to be ready. Please check database status manually and try again" exit 1 fi # Build wait conditions for enabled databases WAIT_CONDITIONS=() [ "$ENABLE_POSTGRESQL" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=pg-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_REDIS" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=redis-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_ELASTICSEARCH" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=es-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_QDRANT" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=qdrant-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_MONGODB" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=mongodb-cluster -n $NAMESPACE --timeout=10s") [ "$ENABLE_NEO4J" = true ] && WAIT_CONDITIONS+=("kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=neo4j-cluster -n $NAMESPACE --timeout=10s") # Check if all enabled databases are ready ALL_READY=true for CONDITION in "${WAIT_CONDITIONS[@]}"; do if ! eval "$CONDITION &> /dev/null"; then ALL_READY=false break fi done if [ "$ALL_READY" = true ]; then print "All database pods are ready, continuing with deployment..." break fi print "Waiting for database pods to be ready (${ELAPSED}s elapsed)..." sleep 10 done print_success "Database clusters installation completed!" print "Use the following command to check the status of installed clusters:" print "kubectl get clusters -n $NAMESPACE" ================================================ FILE: k8s-deploy/databases/03-uninstall-database.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Load configuration file source "$DATABASE_SCRIPT_DIR/00-config.sh" print "Uninstalling database clusters..." # Uninstall database clusters based on configuration [ "$ENABLE_POSTGRESQL" = true ] && print "Uninstalling PostgreSQL cluster..." && helm uninstall pg-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_REDIS" = true ] && print "Uninstalling Redis cluster..." && helm uninstall redis-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_ELASTICSEARCH" = true ] && print "Uninstalling Elasticsearch cluster..." && helm uninstall es-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_QDRANT" = true ] && print "Uninstalling Qdrant cluster..." && helm uninstall qdrant-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_MONGODB" = true ] && print "Uninstalling MongoDB cluster..." && helm uninstall mongodb-cluster --namespace $NAMESPACE 2>/dev/null || true [ "$ENABLE_NEO4J" = true ] && print "Uninstalling Neo4j cluster..." && helm uninstall neo4j-cluster --namespace $NAMESPACE 2>/dev/null || true print_success "Database clusters uninstalled" print "To uninstall database addons and KubeBlocks, run 04-cleanup.sh" ================================================ FILE: k8s-deploy/databases/04-cleanup.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Load configuration file source "$DATABASE_SCRIPT_DIR/00-config.sh" print "Uninstalling KubeBlocks database addons..." # Uninstall database addons based on configuration [ "$ENABLE_POSTGRESQL" = true ] && print "Uninstalling PostgreSQL addon..." && helm uninstall kb-addon-postgresql --namespace kb-system 2>/dev/null || true [ "$ENABLE_REDIS" = true ] && print "Uninstalling Redis addon..." && helm uninstall kb-addon-redis --namespace kb-system 2>/dev/null || true [ "$ENABLE_ELASTICSEARCH" = true ] && print "Uninstalling Elasticsearch addon..." && helm uninstall kb-addon-elasticsearch --namespace kb-system 2>/dev/null || true [ "$ENABLE_QDRANT" = true ] && print "Uninstalling Qdrant addon..." && helm uninstall kb-addon-qdrant --namespace kb-system 2>/dev/null || true [ "$ENABLE_MONGODB" = true ] && print "Uninstalling MongoDB addon..." && helm uninstall kb-addon-mongodb --namespace kb-system 2>/dev/null || true [ "$ENABLE_NEO4J" = true ] && print "Uninstalling Neo4j addon..." && helm uninstall kb-addon-neo4j --namespace kb-system 2>/dev/null || true print_success "Database addons uninstallation completed!" source "$DATABASE_SCRIPT_DIR/uninstall-kubeblocks.sh" kubectl delete namespace $NAMESPACE kubectl delete namespace kb-system print_success "KubeBlocks uninstallation completed!" ================================================ FILE: k8s-deploy/databases/README.md ================================================ # Using KubeBlocks to Deploy and Manage Databases Learn how to quickly deploy and manage various databases in a Kubernetes (K8s) environment through KubeBlocks. ## Introduction to KubeBlocks KubeBlocks is a production-ready, open-source toolkit that runs any database--SQL, NoSQL, vector, or document--on Kubernetes. It scales smoothly from quick dev tests to full production clusters, making it a solid choice for RAG workloads like FastGPT that need several data stores working together. ## Prerequisites Make sure the following tools are installed and configured: * **Kubernetes cluster** * A running Kubernetes cluster is required. * For local development or demos you can use [Minikube](https://minikube.sigs.k8s.io/docs/start/) (needs ≥ 2 CPUs, ≥ 4 GB RAM, and Docker/VM-driver support). * Any standard cloud or on-premises Kubernetes cluster (EKS, GKE, AKS, etc.) also works. * **kubectl** * The Kubernetes command-line interface. * Follow the official guide: [Install and Set Up kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl). * **Helm** (v3.x+) * Kubernetes package manager used by the scripts below. * Install it via the official instructions: [Installing Helm](https://helm.sh/docs/intro/install/). ## Installing 1. **Configure the databases you want** Edit `00-config.sh` file. Based on your requirements, set the variable to `true` for the databases you want to install. For example, to install PostgreSQL and Neo4j: ```bash ENABLE_POSTGRESQL=true ENABLE_REDIS=false ENABLE_ELASTICSEARCH=false ENABLE_QDRANT=false ENABLE_MONGODB=false ENABLE_NEO4J=true ``` 2. **Prepare the environment and install KubeBlocks add-ons** ```bash bash ./01-prepare.sh ``` *What the script does* `01-prepare.sh` performs basic pre-checks (Helm, kubectl, cluster reachability), adds the KubeBlocks Helm repo, and installs any core CRDs or controllers that KubeBlocks itself needs. It also installs the addons for every database you enabled in `00-config.sh`, but **does not** create the actual database clusters yet. 3. **(Optional) Modify database settings** Before deployment you can edit the `values.yaml` file inside each `/` directory to change `version`, `replicas`, `CPU`, `memory`, `storage size`, etc. 4. **Install the database clusters** ```bash bash ./02-install-database.sh ``` *What the script does* `02-install-database.sh` **actually deploys the chosen databases to Kubernetes**. When the script completes, confirm that the clusters are up. It may take a few minutes for all the clusters to become ready, especially if this is the first time running the script as Kubernetes needs to pull container images from registries. You can monitor the progress using the following commands: ```bash kubectl get clusters -n rag NAME CLUSTER-DEFINITION TERMINATION-POLICY STATUS AGE es-cluster Delete Running 11m mongodb-cluster mongodb Delete Running 11m pg-cluster postgresql Delete Running 11m qdrant-cluster qdrant Delete Running 11m redis-cluster redis Delete Running 11m ``` You can see all the Database `Pods` created by KubeBlocks. Initially, you might see pods in `ContainerCreating` or `Pending` status - this is normal while images are being pulled and containers are starting up. Wait until all pods show `Running` status: ```bash kubectl get po -n rag NAME READY STATUS RESTARTS AGE es-cluster-mdit-0 2/2 Running 0 11m mongodb-cluster-mongodb-0 2/2 Running 0 11m pg-cluster-postgresql-0 4/4 Running 0 11m pg-cluster-postgresql-1 4/4 Running 0 11m qdrant-cluster-qdrant-0 2/2 Running 0 11m redis-cluster-redis-0 2/2 Running 0 11m ``` You can also check the detailed status of a specific pod if it's taking longer than expected: ```bash kubectl describe pod -n rag ``` ## Connect to Databases To connect to your databases, follow these steps to identify available accounts, retrieve credentials, and establish connections: ### 1. List Available Database Clusters First, view the database clusters running in your namespace: ```bash kubectl get cluster -n rag ``` ### 2. Retrieve Authentication Credentials For PostgreSQL, retrieve the username and password from Kubernetes secrets: ```bash # Get PostgreSQL username kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.username}' | base64 -d # Get PostgreSQL password kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.password}' | base64 -d ``` If you have trouble finding the correct secret name, list all secrets: ```bash kubectl get secrets -n rag ``` ### 3. Port Forward to Local Machine Use port forwarding to access PostgreSQL from your local machine: ```bash # Forward PostgreSQL port (5432) to your local machine # You can see all services with: kubectl get svc -n rag kubectl port-forward -n rag svc/pg-cluster-postgresql-postgresql 5432:5432 ``` ### 4. Connect Using Database Client Now you can connect using your preferred PostgreSQL client with the retrieved credentials: ```bash # Example: connecting with psql export PGUSER=$(kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.username}' | base64 -d) export PGPASSWORD=$(kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.password}' | base64 -d) psql -h localhost -p 5432 -U $PGUSER ``` Keep the port-forwarding terminal running while you're connecting to the database. ## Uninstalling 1. **Remove the database clusters** ```bash bash ./03-uninstall-database.sh ``` The script deletes the database clusters that were enabled in `00-config.sh`. 2. **Clean up KubeBlocks add-ons** ```bash bash ./04-cleanup.sh ``` This removes the addons installed by `01-prepare.sh`. ## Reference * [Kubeblocks Documentation](https://kubeblocks.io/docs/preview/user_docs/overview/introduction) ================================================ FILE: k8s-deploy/databases/elasticsearch/values.yaml ================================================ ## description: The version of ElasticSearch. ## default: 8.8.2 version: "8.8.2" ## description: Mode for ElasticSearch ## default: multi-node ## one of: [single-node, multi-node] mode: single-node ## description: The number of replicas, for single-node mode, the replicas is 1, for multi-node mode, the default replicas is 3. ## default: 1 ## minimum: 1 ## maximum: 5 replicas: 1 ## description: CPU cores. ## default: 1 ## minimum: 0.5 ## maximum: 64 cpu: 1 ## description: Memory, the unit is Gi. ## default: 2 ## minimum: 1 ## maximum: 1000 memory: 2 ## description: Storage size, the unit is Gi. ## default: 20 ## minimum: 1 ## maximum: 10000 storage: 5 extra: terminationPolicy: Delete disableExporter: true ================================================ FILE: k8s-deploy/databases/install-kubeblocks.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Load configuration file source "$DATABASE_SCRIPT_DIR/00-config.sh" # Check dependencies check_dependencies # Function for installing KubeBlocks install_kubeblocks() { print "Ready to install KubeBlocks." # Install CSI Snapshotter CRDs kubectl create -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.2.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml kubectl create -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.2.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml kubectl create -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.2.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml # Add and update Piraeus repository helm repo add piraeus-charts https://piraeus.io/helm-charts/ helm repo update # Install snapshot controller helm install snapshot-controller piraeus-charts/snapshot-controller -n kb-system --create-namespace kubectl wait --for=condition=ready pods -l app.kubernetes.io/name=snapshot-controller -n kb-system --timeout=60s print_success "snapshot-controller installation complete!" # Install KubeBlocks CRDs kubectl create -f https://github.com/apecloud/kubeblocks/releases/download/v${KB_VERSION}/kubeblocks_crds.yaml # Add and update KubeBlocks repository helm repo add kubeblocks $HELM_REPO helm repo update # Install KubeBlocks helm install kubeblocks kubeblocks/kubeblocks --namespace kb-system --create-namespace --version=${KB_VERSION} # Verify installation print "Waiting for KubeBlocks to be ready..." kubectl wait --for=condition=ready pods -l app.kubernetes.io/instance=kubeblocks -n kb-system --timeout=120s print_success "KubeBlocks installation complete!" } # Check if KubeBlocks is already installed print "Checking if KubeBlocks is already installed in kb-system namespace..." if kubectl get namespace kb-system &>/dev/null && kubectl get deployment kubeblocks -n kb-system &>/dev/null; then print_success "KubeBlocks is already installed in kb-system namespace." else # Call the function to install KubeBlocks install_kubeblocks fi ================================================ FILE: k8s-deploy/databases/mongodb/values.yaml ================================================ ## description: Cluster version. ## default: 6.0.16 ## one of: [8.0.8, 8.0.6, 8.0.4, 7.0.19, 7.0.16, 7.0.12, 6.0.22, 6.0.20, 6.0.16, 5.0.30, 5.0.28, 4.4.29, 4.2.24, 4.0.28] version: 6.0.16 ## description: Cluster topology mode. ## default: standalone ## one of: [standalone, replicaset] mode: standalone ## description: CPU cores. ## default: 0.5 ## minimum: 0.5 ## maximum: 64 cpu: 1 ## description: Memory, the unit is Gi. ## default: 0.5 ## minimum: 0.5 ## maximum: 1000 memory: 1 ## description: Storage size, the unit is Gi. ## default: 20 ## minimum: 1 ## maximum: 10000 storage: 20 ## default: enabled ## one of: [enabled, disabled] hostnetwork: "disabled" extra: terminationPolicy: Delete ================================================ FILE: k8s-deploy/databases/neo4j/values.yaml ================================================ # Version # description: Cluster version. # default: 5.26.5 # one of: [5.26.5, 4.4.42] version: 5.26.5 # Mode # description: Cluster topology mode. # default: singlealone # one of: [singlealone] mode: singlealone # CPU # description: CPU cores. # default: 2 # minimum: 2 # maximum: 64 cpu: 2 # Memory(Gi) # description: Memory, the unit is Gi. # default: 2 # minimum: 2 # maximum: 1000 memory: 4 # Storage(Gi) # description: Storage size, the unit is Gi. # default: 20 # minimum: 1 # maximum: 10000 storage: 20 # Replicas # description: The number of replicas, for standalone mode, the replicas is 1, for replicaset mode, the default replicas is 3. # default: 1 # minimum: 1 # maximum: 5 replicas: 1 # Storage Class Name # description: Storage class name of the data volume storageClassName: "" extra: terminationPolicy: Delete ================================================ FILE: k8s-deploy/databases/postgresql/values.yaml ================================================ ## description: service version. ## default: 15.7.0 version: 16.4.0 ## mode postgresql cluster topology mode replication mode: replication ## description: The number of replicas, for standalone mode, the replicas is 1, for replication mode, the default replicas is 2. ## default: 1 ## minimum: 1 ## maximum: 5 replicas: 2 ## description: CPU cores. ## default: 0.5 ## minimum: 0.5 ## maximum: 64 cpu: 1 ## description: Memory, the unit is Gi. ## default: 0.5 ## minimum: 0.5 ## maximum: 1000 memory: 1 ## description: Storage size, the unit is Gi. ## default: 20 ## minimum: 1 ## maximum: 10000 storage: 5 ## terminationPolicy define Cluster termination policy. One of DoNotTerminate, Delete, WipeOut. terminationPolicy: Delete ================================================ FILE: k8s-deploy/databases/qdrant/values.yaml ================================================ ## description: The version of Qdrant. ## default: 1.10.0 version: 1.10.0 ## description: The number of replicas. ## default: 1 ## minimum: 1 ## maximum: 16 replicas: 1 ## description: CPU cores. ## default: 1 ## minimum: 0.5 ## maximum: 64 cpu: 1 ## description: Memory, the unit is Gi. ## default: 2 ## minimum: 0.5 ## maximum: 1000 memory: 1 ## description: Storage size, the unit is Gi. ## default: 20 ## minimum: 1 ## maximum: 10000 storage: 20 ## customized default values to override kblib chart's values extra: terminationPolicy: Delete ================================================ FILE: k8s-deploy/databases/redis/values.yaml ================================================ ## description: Cluster version. ## default: 7.2.7 version: 7.2.7 ## description: Cluster topology mode. ## default: replication ## one of: [standalone, replication, cluster, replication-twemproxy] mode: standalone ## description: The number of replicas, for standalone mode, the replicas is 1, for replication mode, the default replicas is 2. ## default: 1 ## minimum: 1 ## maximum: 5 replicas: 1 ## description: CPU cores. ## default: 0.5 ## minimum: 0.5 ## maximum: 64 cpu: 0.5 ## description: Memory, the unit is Gi. ## default: 0.5 ## minimum: 0.5 ## maximum: 1000 memory: 1 ## description: Storage size, the unit is Gi. ## default: 20 ## minimum: 1 ## maximum: 10000 storage: 20 extra: disableExporter: true ================================================ FILE: k8s-deploy/databases/scripts/common.sh ================================================ #!/bin/bash print_title() { echo "============================================" echo "$1" echo "============================================" } print_success() { echo "✅ $1" } print_error() { echo "❌ $1" } print_warning() { echo "⚠️ $1" } print_info() { echo "🔹 $1" } print() { echo "$1" } # Check dependencies check_dependencies(){ print "Checking dependencies..." command -v kubectl >/dev/null 2>&1 || { print "Error: kubectl command not found"; exit 1; } command -v helm >/dev/null 2>&1 || { print "Error: helm command not found"; exit 1; } # Check if Kubernetes is available print "Checking if Kubernetes is available..." kubectl cluster-info &>/dev/null if [ $? -ne 0 ]; then print "Error: Kubernetes cluster is not accessible. Please ensure you have proper access to a Kubernetes cluster." exit 1 fi print_success "Kubernetes cluster is accessible." } ================================================ FILE: k8s-deploy/databases/uninstall-kubeblocks.sh ================================================ #!/bin/bash # Get the directory where this script is located DATABASE_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Load configuration file source "$DATABASE_SCRIPT_DIR/00-config.sh" # Check dependencies print "Checking dependencies..." command -v kubectl >/dev/null 2>&1 || { print "Error: kubectl command not found"; exit 1; } command -v helm >/dev/null 2>&1 || { print "Error: helm command not found"; exit 1; } print "Checking if Kubernetes is available..." if ! kubectl cluster-info &>/dev/null; then print "Error: Kubernetes cluster is not accessible. Please ensure you have proper access to a Kubernetes cluster." exit 1 fi print "Checking if KubeBlocks is installed in kb-system namespace..." if ! kubectl get namespace kb-system &>/dev/null; then print "KubeBlocks is not installed in kb-system namespace." exit 0 fi # Function for uninstalling KubeBlocks uninstall_kubeblocks() { print "Uninstalling KubeBlocks..." # Uninstall KubeBlocks Helm chart helm uninstall kubeblocks -n kb-system # Uninstall snapshot controller helm uninstall snapshot-controller -n kb-system # Delete KubeBlocks CRDs kubectl delete -f https://github.com/apecloud/kubeblocks/releases/download/v${KB_VERSION}/kubeblocks_crds.yaml --ignore-not-found=true # Delete CSI Snapshotter CRDs kubectl delete -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.2.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml --ignore-not-found=true kubectl delete -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.2.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml --ignore-not-found=true kubectl delete -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.2.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml --ignore-not-found=true # Delete the kb-system namespace print "Waiting for resources to be removed..." kubectl delete namespace kb-system --timeout=180s print "KubeBlocks has been successfully uninstalled!" } # Call the function to uninstall KubeBlocks uninstall_kubeblocks ================================================ FILE: k8s-deploy/install_lightrag.sh ================================================ #!/bin/bash NAMESPACE=rag SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" if [ -z "$OPENAI_API_KEY" ]; then echo "OPENAI_API_KEY environment variable is not set" read -s -p "Enter your OpenAI API key: " OPENAI_API_KEY if [ -z "$OPENAI_API_KEY" ]; then echo "Error: OPENAI_API_KEY must be provided" exit 1 fi export OPENAI_API_KEY=$OPENAI_API_KEY fi if [ -z "$OPENAI_API_BASE" ]; then echo "OPENAI_API_BASE environment variable is not set, will use default value" read -p "Enter OpenAI API base URL (press Enter to skip if not needed): " OPENAI_API_BASE export OPENAI_API_BASE=$OPENAI_API_BASE fi # Install KubeBlocks (if not already installed) bash "$SCRIPT_DIR/databases/01-prepare.sh" # Install database clusters bash "$SCRIPT_DIR/databases/02-install-database.sh" # Create vector extension in PostgreSQL if enabled print "Waiting for PostgreSQL pods to be ready..." if kubectl wait --for=condition=ready pods -l kubeblocks.io/role=primary,app.kubernetes.io/instance=pg-cluster -n $NAMESPACE --timeout=300s; then print "Creating vector extension in PostgreSQL..." kubectl exec -it $(kubectl get pods -l kubeblocks.io/role=primary,app.kubernetes.io/instance=pg-cluster -n $NAMESPACE -o name) -n $NAMESPACE -- psql -c "CREATE EXTENSION vector;" print_success "Vector extension created successfully." else print "Warning: PostgreSQL pods not ready within timeout. Vector extension not created." fi # Get database passwords from Kubernetes secrets echo "Retrieving database credentials from Kubernetes secrets..." POSTGRES_PASSWORD=$(kubectl get secrets -n rag pg-cluster-postgresql-account-postgres -o jsonpath='{.data.password}' | base64 -d) if [ -z "$POSTGRES_PASSWORD" ]; then echo "Error: Could not retrieve PostgreSQL password. Make sure PostgreSQL is deployed and the secret exists." exit 1 fi export POSTGRES_PASSWORD=$POSTGRES_PASSWORD NEO4J_PASSWORD=$(kubectl get secrets -n rag neo4j-cluster-neo4j-account-neo4j -o jsonpath='{.data.password}' | base64 -d) if [ -z "$NEO4J_PASSWORD" ]; then echo "Error: Could not retrieve Neo4J password. Make sure Neo4J is deployed and the secret exists." exit 1 fi export NEO4J_PASSWORD=$NEO4J_PASSWORD #REDIS_PASSWORD=$(kubectl get secrets -n rag redis-cluster-redis-account-default -o jsonpath='{.data.password}' | base64 -d) #if [ -z "$REDIS_PASSWORD" ]; then # echo "Error: Could not retrieve Redis password. Make sure Redis is deployed and the secret exists." # exit 1 #fi #export REDIS_PASSWORD=$REDIS_PASSWORD echo "Deploying production LightRAG (using external databases)..." if ! kubectl get namespace rag &> /dev/null; then echo "creating namespace 'rag'..." kubectl create namespace rag fi helm upgrade --install lightrag $SCRIPT_DIR/lightrag \ --namespace $NAMESPACE \ --set-string env.POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ --set-string env.NEO4J_PASSWORD=$NEO4J_PASSWORD \ --set-string env.LLM_BINDING=openai \ --set-string env.LLM_MODEL=gpt-4o-mini \ --set-string env.LLM_BINDING_HOST=$OPENAI_API_BASE \ --set-string env.LLM_BINDING_API_KEY=$OPENAI_API_KEY \ --set-string env.EMBEDDING_BINDING=openai \ --set-string env.EMBEDDING_MODEL=text-embedding-ada-002 \ --set-string env.EMBEDDING_DIM=1536 \ --set-string env.EMBEDDING_BINDING_API_KEY=$OPENAI_API_KEY # --set-string env.REDIS_URI="redis://default:${REDIS_PASSWORD}@redis-cluster-redis-redis:6379" # Wait for LightRAG pod to be ready echo "" echo "Waiting for lightrag pod to be ready..." kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance=lightrag --timeout=300s -n rag echo "lightrag pod is ready" echo "" echo "Running Port-Forward:" echo " kubectl --namespace rag port-forward svc/lightrag 9621:9621" echo "===========================================" echo "" echo "✅ You can visit LightRAG at: http://localhost:9621" echo "" kubectl --namespace rag port-forward svc/lightrag 9621:9621 ================================================ FILE: k8s-deploy/install_lightrag_dev.sh ================================================ #!/bin/bash NAMESPACE=rag SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" check_dependencies(){ echo "Checking dependencies..." command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl command not found"; exit 1; } command -v helm >/dev/null 2>&1 || { echo "Error: helm command not found"; exit 1; } # Check if Kubernetes is available echo "Checking if Kubernetes is available..." kubectl cluster-info &>/dev/null if [ $? -ne 0 ]; then echo "Error: Kubernetes cluster is not accessible. Please ensure you have proper access to a Kubernetes cluster." exit 1 fi echo "Kubernetes cluster is accessible." } check_dependencies if [ -z "$OPENAI_API_KEY" ]; then echo "OPENAI_API_KEY environment variable is not set" read -s -p "Enter your OpenAI API key: " OPENAI_API_KEY if [ -z "$OPENAI_API_KEY" ]; then echo "Error: OPENAI_API_KEY must be provided" exit 1 fi export OPENAI_API_KEY=$OPENAI_API_KEY fi if [ -z "$OPENAI_API_BASE" ]; then echo "OPENAI_API_BASE environment variable is not set, will use default value" read -p "Enter OpenAI API base URL (press Enter to skip if not needed): " OPENAI_API_BASE export OPENAI_API_BASE=$OPENAI_API_BASE fi required_env_vars=("OPENAI_API_BASE" "OPENAI_API_KEY") for var in "${required_env_vars[@]}"; do if [ -z "${!var}" ]; then echo "Error: $var environment variable is not set" exit 1 fi done if ! kubectl get namespace rag &> /dev/null; then echo "creating namespace 'rag'..." kubectl create namespace rag fi helm upgrade --install lightrag-dev $SCRIPT_DIR/lightrag \ --namespace rag \ --set-string env.LIGHTRAG_KV_STORAGE=JsonKVStorage \ --set-string env.LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage \ --set-string env.LIGHTRAG_GRAPH_STORAGE=NetworkXStorage \ --set-string env.LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage \ --set-string env.LLM_BINDING=openai \ --set-string env.LLM_MODEL=gpt-4o-mini \ --set-string env.LLM_BINDING_HOST=$OPENAI_API_BASE \ --set-string env.LLM_BINDING_API_KEY=$OPENAI_API_KEY \ --set-string env.EMBEDDING_BINDING=openai \ --set-string env.EMBEDDING_MODEL=text-embedding-ada-002 \ --set-string env.EMBEDDING_DIM=1536 \ --set-string env.EMBEDDING_BINDING_API_KEY=$OPENAI_API_KEY # Wait for LightRAG pod to be ready echo "" echo "Waiting for lightrag-dev pod to be ready..." kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance=lightrag-dev --timeout=300s -n rag echo "lightrag-dev pod is ready" echo "" echo "Running Port-Forward:" echo " kubectl --namespace rag port-forward svc/lightrag-dev 9621:9621" echo "===========================================" echo "" echo "✅ You can visit LightRAG at: http://localhost:9621" echo "" kubectl --namespace rag port-forward svc/lightrag-dev 9621:9621 ================================================ FILE: k8s-deploy/lightrag/.helmignore ================================================ # Patterns to ignore when building packages. # This supports shell glob matching, relative path matching, and # negation (prefixed with !). Only one pattern per line. .DS_Store # Common VCS dirs .git/ .gitignore .bzr/ .bzrignore .hg/ .hgignore .svn/ # Common backup files *.swp *.bak *.tmp *.orig *~ # Various IDEs .project .idea/ *.tmproj .vscode/ ================================================ FILE: k8s-deploy/lightrag/Chart.yaml ================================================ apiVersion: v2 name: lightrag description: A Helm chart for LightRAG, an efficient and lightweight RAG system type: application version: 0.1.1 appVersion: "1.0.0" maintainers: - name: LightRAG Team - name: earayu email: earayu@gmail.com ================================================ FILE: k8s-deploy/lightrag/templates/NOTES.txt ================================================ =========================================== LightRAG has been successfully deployed! =========================================== View application logs: kubectl logs -f --namespace {{ .Release.Namespace }} deploy/{{ include "lightrag.fullname" . }} =========================================== Access the application: {{- if contains "NodePort" .Values.service.type }} Run these commands to get access information: ----------------------------------------- export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "lightrag.fullname" . }}) export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") echo "LightRAG is accessible at: http://$NODE_IP:$NODE_PORT" ----------------------------------------- {{- else if contains "LoadBalancer" .Values.service.type }} Run these commands to get access information (external IP may take a minute to assign): ----------------------------------------- export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "lightrag.fullname" . }} --template "{{ "{{ range (index .status.loadBalancer.ingress 0) }}{{ . }}{{ end }}" }}") echo "LightRAG is accessible at: http://$SERVICE_IP:{{ .Values.service.port }}" ----------------------------------------- If SERVICE_IP is empty, retry the command or check service status with: kubectl get svc --namespace {{ .Release.Namespace }} {{ include "lightrag.fullname" . }} {{- else if contains "ClusterIP" .Values.service.type }} For development environments, to access LightRAG from your local machine: 1. Run this port-forward command in your terminal: kubectl --namespace {{ .Release.Namespace }} port-forward svc/{{ include "lightrag.fullname" . }} {{ .Values.service.port }}:{{ .Values.env.PORT }} 2. While the command is running, open your browser and navigate to: http://localhost:{{ .Values.service.port }} Note: To stop port-forwarding, press Ctrl+C in the terminal. {{- end }} =========================================== ================================================ FILE: k8s-deploy/lightrag/templates/_helpers.tpl ================================================ {{/* Application name */}} {{- define "lightrag.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Full application name */}} {{- define "lightrag.fullname" -}} {{- default .Release.Name .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} {{- define "lightrag.labels" -}} app.kubernetes.io/name: {{ include "lightrag.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end }} {{/* Selector labels */}} {{- define "lightrag.selectorLabels" -}} app.kubernetes.io/name: {{ include "lightrag.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* .env file content */}} {{- define "lightrag.envContent" -}} {{- $first := true -}} {{- range $key, $val := .Values.env -}} {{- if not $first -}}{{- "\n" -}}{{- end -}} {{- $first = false -}} {{ $key }}={{ $val }} {{- end -}} {{- end -}} ================================================ FILE: k8s-deploy/lightrag/templates/deployment.yaml ================================================ apiVersion: apps/v1 kind: Deployment metadata: name: {{ include "lightrag.fullname" . }} labels: {{- include "lightrag.labels" . | nindent 4 }} spec: replicas: {{ .Values.replicaCount }} selector: matchLabels: {{- include "lightrag.selectorLabels" . | nindent 6 }} template: metadata: annotations: checksum/config: {{ include "lightrag.envContent" . | sha256sum }} labels: {{- include "lightrag.selectorLabels" . | nindent 8 }} spec: containers: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: IfNotPresent ports: - name: http containerPort: {{ .Values.env.PORT }} protocol: TCP readinessProbe: httpGet: path: /health port: http initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 2 successThreshold: 1 failureThreshold: 3 resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: - name: rag-storage mountPath: /app/data/rag_storage - name: inputs mountPath: /app/data/inputs - name: env-file mountPath: /app/.env subPath: .env {{- $envFrom := default (dict) .Values.envFrom }} {{- $envFromEntries := list }} {{- range (default (list) (index $envFrom "secrets")) }} {{- $envFromEntries = append $envFromEntries (dict "secretRef" (dict "name" .name)) }} {{- end }} {{- range (default (list) (index $envFrom "configmaps")) }} {{- $envFromEntries = append $envFromEntries (dict "configMapRef" (dict "name" .name)) }} {{- end }} {{- if gt (len $envFromEntries) 0 }} envFrom: {{- toYaml $envFromEntries | nindent 12 }} {{- end }} {{- with .Values.image.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} volumes: - name: env-file secret: secretName: {{ include "lightrag.fullname" . }}-env {{- if .Values.persistence.enabled }} - name: rag-storage persistentVolumeClaim: claimName: {{ include "lightrag.fullname" . }}-rag-storage - name: inputs persistentVolumeClaim: claimName: {{ include "lightrag.fullname" . }}-inputs {{- else }} - name: rag-storage emptyDir: {} - name: inputs emptyDir: {} {{- end }} strategy: {{- toYaml .Values.updateStrategy | nindent 4 }} ================================================ FILE: k8s-deploy/lightrag/templates/pvc.yaml ================================================ {{- if .Values.persistence.enabled }} --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: {{ include "lightrag.fullname" . }}-rag-storage labels: {{- include "lightrag.labels" . | nindent 4 }} spec: accessModes: - ReadWriteOnce resources: requests: storage: {{ .Values.persistence.ragStorage.size }} --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: {{ include "lightrag.fullname" . }}-inputs labels: {{- include "lightrag.labels" . | nindent 4 }} spec: accessModes: - ReadWriteOnce resources: requests: storage: {{ .Values.persistence.inputs.size }} {{- end }} ================================================ FILE: k8s-deploy/lightrag/templates/secret.yaml ================================================ apiVersion: v1 kind: Secret metadata: name: {{ include "lightrag.fullname" . }}-env labels: {{- include "lightrag.labels" . | nindent 4 }} type: Opaque stringData: .env: |- {{- include "lightrag.envContent" . | nindent 4 }} ================================================ FILE: k8s-deploy/lightrag/templates/service.yaml ================================================ apiVersion: v1 kind: Service metadata: name: {{ include "lightrag.fullname" . }} labels: {{- include "lightrag.labels" . | nindent 4 }} spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} targetPort: {{ .Values.env.PORT }} protocol: TCP name: http selector: {{- include "lightrag.selectorLabels" . | nindent 4 }} ================================================ FILE: k8s-deploy/lightrag/values.yaml ================================================ replicaCount: 1 image: repository: ghcr.io/hkuds/lightrag tag: latest # Optionally specify imagePullSecrets if your image is in a private registry # example: # imagePullSecrets: # - name: my-registry-secret imagePullSecrets: [] # Specify a deployment strategy # example: # updateStrategy: # type: RollingUpdate # rollingUpdate: # maxUnavailable: 25% # maxSurge: 25% # Default for now should be Recreate as any RollingUpdate will cause issues with # multiple instances trying to access the same persistent storage if not using RWX volumes. updateStrategy: type: Recreate service: type: ClusterIP port: 9621 resources: limits: cpu: 1000m memory: 2Gi requests: cpu: 500m memory: 1Gi persistence: enabled: true ragStorage: size: 10Gi inputs: size: 5Gi # Allow specifying additional environment variables from ConfigMaps or Secrets created outside of this chart envFrom: configmaps: [] # - name: my-shiny-configmap-1 secrets: [] # - name: my-shiny-secret-1 env: HOST: 0.0.0.0 PORT: 9621 WEBUI_TITLE: Graph RAG Engine WEBUI_DESCRIPTION: Simple and Fast Graph Based RAG System LLM_BINDING: openai LLM_MODEL: gpt-4o-mini LLM_BINDING_HOST: LLM_BINDING_API_KEY: EMBEDDING_BINDING: openai EMBEDDING_MODEL: text-embedding-ada-002 EMBEDDING_DIM: 1536 EMBEDDING_BINDING_API_KEY: LIGHTRAG_KV_STORAGE: PGKVStorage LIGHTRAG_VECTOR_STORAGE: PGVectorStorage # LIGHTRAG_KV_STORAGE: RedisKVStorage # LIGHTRAG_VECTOR_STORAGE: QdrantVectorDBStorage LIGHTRAG_GRAPH_STORAGE: Neo4JStorage LIGHTRAG_DOC_STATUS_STORAGE: PGDocStatusStorage # Replace with your POSTGRES credentials POSTGRES_HOST: pg-cluster-postgresql-postgresql POSTGRES_PORT: 5432 POSTGRES_USER: postgres POSTGRES_PASSWORD: POSTGRES_DATABASE: postgres POSTGRES_WORKSPACE: default # Replace with your NEO4J credentials NEO4J_URI: neo4j://neo4j-cluster-neo4j:7687 NEO4J_USERNAME: neo4j NEO4J_PASSWORD: # Replace with your Qdrant credentials QDRANT_URL: http://qdrant-cluster-qdrant-qdrant:6333 # REDIS_URI: redis://default:${REDIS_PASSWORD}@redis-cluster-redis-redis:6379 ================================================ FILE: k8s-deploy/uninstall_lightrag.sh ================================================ #!/bin/bash NAMESPACE=rag helm uninstall lightrag --namespace $NAMESPACE ================================================ FILE: k8s-deploy/uninstall_lightrag_dev.sh ================================================ #!/bin/bash NAMESPACE=rag helm uninstall lightrag-dev --namespace $NAMESPACE ================================================ FILE: lightrag/__init__.py ================================================ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam __version__ = "1.4.11" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG" ================================================ FILE: lightrag/api/.gitignore ================================================ inputs rag_storage ================================================ FILE: lightrag/api/README-zh.md ================================================ # LightRAG 服务器和 WebUI LightRAG 服务器旨在提供 Web 界面和 API 支持。Web 界面便于文档索引、知识图谱探索和简单的 RAG 查询界面。LightRAG 服务器还提供了与 Ollama 兼容的接口,旨在将 LightRAG 模拟为 Ollama 聊天模型。这使得 AI 聊天机器人(如 Open WebUI)可以轻松访问 LightRAG。 ![image-20250323122538997](./README.assets/image-20250323122538997.png) ![image-20250323122754387](./README.assets/image-20250323122754387.png) ![image-20250323123011220](./README.assets/image-20250323123011220.png) ## 入门指南 ### 安装 * 从 PyPI 安装 ```bash ### 使用 uv 安装 LightRAG 服务器(作为工具,推荐) uv tool install "lightrag-hku[api]" ### 或使用 pip # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install "lightrag-hku[api]" ``` * 从源代码安装 ```bash # 克隆仓库 git clone https://github.com/HKUDS/lightrag.git # 进入仓库目录 cd lightrag # 使用 uv (推荐) # 注意: uv sync 会自动在 .venv/ 目录创建虚拟环境 uv sync --extra api source .venv/bin/activate # 激活虚拟环境 (Linux/macOS) # Windows 系统: .venv\Scripts\activate # 或使用 pip 与虚拟环境 # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install -e ".[api]" # 构建前端代码 cd lightrag_webui bun install --frozen-lockfile bun run build cd .. ``` ### 启动 LightRAG 服务器前的准备 LightRAG 需要同时集成 LLM(大型语言模型)和嵌入模型以有效执行文档索引和查询操作。在首次部署 LightRAG 服务器之前,必须配置 LLM 和嵌入模型的设置。LightRAG 支持绑定到各种 LLM/嵌入后端: * ollama * lollms * openai 或 openai 兼容 * azure_openai * aws_bedrock * gemini 建议使用环境变量来配置 LightRAG 服务器。项目根目录中有一个名为 `env.example` 的示例环境变量文件。请将此文件复制到启动目录并重命名为 `.env`。之后,您可以在 `.env` 文件中修改与 LLM 和嵌入模型相关的参数。需要注意的是,LightRAG 服务器每次启动时都会将 `.env` 中的环境变量加载到系统环境变量中。**LightRAG 服务器会优先使用系统环境变量中的设置**。 > 由于安装了 Python 扩展的 VS Code 可能会在集成终端中自动加载 .env 文件,请在每次修改 .env 文件后打开新的终端会话。 以下是 LLM 和嵌入模型的一些常见设置示例: * OpenAI LLM + Ollama 嵌入 ``` LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 # EMBEDDING_BINDING_API_KEY=your_api_key ``` > 如果改为使用 Google Gemini, 设置 `LLM_BINDING=gemini`, 选择模型 `LLM_MODEL=gemini-flash-latest`, 并设置访问密钥 `LLM_BINDING_API_KEY` (或 `GEMINI_API_KEY`). * Ollama LLM + Ollama 嵌入 ``` LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key ### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000) OLLAMA_LLM_NUM_CTX=8192 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 # EMBEDDING_BINDING_API_KEY=your_api_key ``` > **重要提示**:在文档索引前必须确定使用的Embedding模型,且在文档查询阶段必须沿用与索引阶段相同的模型。有些存储(例如PostgreSQL)在首次建立数表的时候需要确定向量维度,因此更换Embedding模型后需要删除向量相关库表,以便让LightRAG重建新的库表。 ### 使用 Setup 工具创建 .env 文件 除了手动编辑 `env.example` 之外,您还可以使用交互式向导生成配置好的 `.env`,并在需要时生成 `docker-compose.final.yml`: ```bash make env-base # 必跑第一步:配置 LLM、Embedding、Reranker make env-storage # 可选:配置存储后端和数据库服务 make env-server # 可选:配置服务端口、鉴权和 SSL make env-security-check # 可选:审计当前 .env 中的安全风险 ``` 每个目标的详细说明请参阅 [docs/InteractiveSetup.md](../../docs/InteractiveSetup.md)。 这些 setup 向导只负责更新配置;如需在部署前审计当前 `.env` 的安全风险,请额外运行 `make env-security-check`。 ### 启动 LightRAG 服务器 LightRAG 服务器支持两种运行模式: * 简单高效的 Uvicorn 模式 ``` lightrag-server ``` * 多进程 Gunicorn + Uvicorn 模式(生产模式,不支持 Windows 环境) ``` lightrag-gunicorn --workers 4 ``` 启动LightRAG的时候,当前工作目录必须含有`.env`配置文件。**要求将.env文件置于启动目录中是经过特意设计的**。 这样做的目的是支持用户同时启动多个LightRAG实例,并为不同实例配置不同的.env文件。**修改.env文件后,您需要重新打开终端以使新设置生效**。 这是因为每次启动时,LightRAG Server会将.env文件中的环境变量加载至系统环境变量,且系统环境变量的设置具有更高优先级。 启动时可以通过命令行参数覆盖`.env`文件中的配置。常用的命令行参数包括: - `--host`:服务器监听地址(默认:0.0.0.0) - `--port`:服务器监听端口(默认:9621) - `--timeout`:LLM 请求超时时间(默认:150 秒) - `--log-level`:日志级别(默认:INFO) - `--working-dir`:数据库持久化目录(默认:./rag_storage) - `--input-dir`:上传文件存放目录(默认:./inputs) - `--workspace`: 工作空间名称,用于逻辑上隔离多个LightRAG实例之间的数据(默认:空) ### 使用 Docker 启动 LightRAG 服务器 使用 Docker Compose 是部署和运行 LightRAG Server 最便捷的方式。 - 创建一个项目目录。 - 将 LightRAG 仓库中的 `docker-compose.yml` 文件复制到您的项目目录中。 - 准备 `.env` 文件:复制示例文件 [`env.example`](https://ai.znipower.com:5013/c/env.example) 创建自定义的 `.env` 文件,并根据您的具体需求配置 LLM 和嵌入参数。 - 通过以下命令启动 LightRAG 服务器: ```shell docker compose up # 如果希望启动后让程序退到后台运行,需要在命令的最后添加 -d 参数 ``` > 可以通过以下链接获取官方的docker compose文件:[docker-compose.yml]( https://raw.githubusercontent.com/HKUDS/LightRAG/refs/heads/main/docker-compose.yml) 。如需获取LightRAG的历史版本镜像,可以访问以下链接: [LightRAG Docker Images]( https://github.com/HKUDS/LightRAG/pkgs/container/lightrag). 如需获取更多关于docker部署的信息,请参阅 [DockerDeployment.md](./../../docs/DockerDeployment.md). ### Nginx 反向代理配置 在 LightRAG 服务器前使用 Nginx 作为反向代理时,需要为 `/documents/upload` 端点配置 `client_max_body_size` 以处理大文件上传。如果不进行此配置,Nginx 将拒绝大于 1MB(默认限制)的文件,并在请求到达 LightRAG 之前返回 `413 Request Entity Too Large` 错误。 **推荐配置:** ```nginx server { listen 80; server_name your-domain.com; # 全局默认:8MB 用于 LLM 长上下文查询 client_max_body_size 8M; # 上传端点:100MB 用于大文件上传 location /documents/upload { client_max_body_size 100M; proxy_pass http://localhost:9621; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # 大文件上传需要更长超时时间 proxy_read_timeout 300s; proxy_send_timeout 300s; } # 流式端点:LLM 响应流式传输 location ~ ^/(query/stream|api/chat|api/generate) { gzip off; # 禁用流式响应的压缩 proxy_pass http://localhost:9621; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # LLM 生成需要较长超时 proxy_read_timeout 300s; } # 其他端点 location / { proxy_pass http://localhost:9621; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } ``` **关键要点:** 1. **全局限制(8MB)**:足以处理具有长对话历史和上下文的 LLM 查询(128K tokens ≈ 512KB + JSON 开销)。 2. **上传端点(100MB)**:必须匹配或超过 `.env` 文件中的 `MAX_UPLOAD_SIZE`。默认 `MAX_UPLOAD_SIZE` 为 100MB。 3. **流式端点**:为流式端点禁用 gzip 压缩(`gzip off`)以确保实时响应传输。LightRAG 自动设置 `X-Accel-Buffering: no` 头以禁用响应缓冲。 4. **超时设置**:大文件上传和 LLM 生成需要更长的超时时间;相应调整 `proxy_read_timeout` 和 `proxy_send_timeout`。 5. **大小验证层**: - Nginx 首先验证 `Content-Length` 头 - LightRAG 在上传过程中执行流式验证 - 在两层设置适当的限制可确保更好的错误消息和安全性 ### 离线部署 官方的 LightRAG Docker 镜像完全兼容离线或隔离网络环境。如需搭建自己的离线部署环境,请参考 [离线部署指南](./../../docs/OfflineDeployment.md)。 ### 启动多个LightRAG实例 有两种方式可以启动多个LightRAG实例。第一种方式是为每个实例配置一个完全独立的工作环境。此时需要为每个实例创建一个独立的工作目录,然后在这个工作目录上放置一个当前实例专用的`.env`配置文件。不同实例的配置文件中的服务器监听端口不能重复,然后在工作目录上执行 lightrag-server 启动服务即可。 第二种方式是所有实例共享一套相同的`.env`配置文件,然后通过命令行参数来为每个实例指定不同的服务器监听端口和工作空间。你可以在同一个工作目录中通过不同的命令行参数启动多个LightRAG实例。例如: ``` # 启动实例1 lightrag-server --port 9621 --workspace space1 # 启动实例2 lightrag-server --port 9622 --workspace space2 ``` 工作空间的作用是实现不同实例之间的数据隔离。因此不同实例之间的`workspace`参数必须不同,否则会导致数据混乱,数据将会被破坏。 通过 Docker Compose 启动多个 LightRAG 实例时,只需在 `docker-compose.yml` 中为每个容器指定不同的 `WORKSPACE` 和 `PORT` 环境变量即可。即使所有实例共享同一个 `.env` 文件,Compose 中定义的容器环境变量也会优先覆盖 `.env` 文件中的同名设置,从而确保每个实例拥有独立的配置。 ### LightRAG实例间的数据隔离 每个实例配置一个独立的工作目录和专用`.env`配置文件通常能够保证内存数据库中的本地持久化文件保存在各自的工作目录,实现数据的相互隔离。LightRAG默认存储全部都是内存数据库,通过这种方式进行数据隔离是没有问题的。但是如果使用的是外部数据库,如果不同实例访问的是同一个数据库实例,就需要通过配置工作空间来实现数据隔离,否则不同实例的数据将会出现冲突并被破坏。 命令行的 workspace 参数和`.env`文件中的环境变量`WORKSPACE` 都可以用于指定当前实例的工作空间名字,命令行参数的优先级别更高。下面是不同类型的存储实现工作空间的方式: - **对于本地基于文件的数据库,数据隔离通过工作空间子目录实现:** JsonKVStorage, JsonDocStatusStorage, NetworkXStorage, NanoVectorDBStorage, FaissVectorDBStorage。 - **对于将数据存储在集合(collection)中的数据库,通过在集合名称前添加工作空间前缀来实现:** RedisKVStorage, RedisDocStatusStorage, MilvusVectorDBStorage, QdrantVectorDBStorage, MongoKVStorage, MongoDocStatusStorage, MongoVectorDBStorage, MongoGraphStorage, PGGraphStorage。 - **对于关系型数据库,数据隔离通过向表中添加 `workspace` 字段进行数据的逻辑隔离:** PGKVStorage, PGVectorStorage, PGDocStatusStorage。 * **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage * **对于OpenSearch,通过索引名称前缀实现数据隔离**:OpenSearchKVStorage、OpenSearchDocStatusStorage、OpenSearchGraphStorage、OpenSearchVectorDBStorage 为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL的默认工作空间为`default`,Neo4j的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`, `OPENSEARCH_WORKSPACE`。 ### Gunicorn + Uvicorn 的多工作进程 LightRAG 服务器可以在 `Gunicorn + Uvicorn` 预加载模式下运行。Gunicorn 的多工作进程(多进程)功能可以防止文档索引任务阻塞 RAG 查询。使用 CPU 密集型文档提取工具(如 docling)在纯 Uvicorn 模式下可能会导致整个系统被阻塞。 虽然 LightRAG 服务器使用一个工作进程来处理文档索引流程,但通过 Uvicorn 的异步任务支持,可以并行处理多个文件。文档索引速度的瓶颈主要在于 LLM。如果您的 LLM 支持高并发,您可以通过增加 LLM 的并发级别来加速文档索引。以下是几个与并发处理相关的环境变量及其默认值: ``` ### 工作进程数,数字不大于 (2 x 核心数) + 1 WORKERS=2 ### 一批中并行处理的文件数 MAX_PARALLEL_INSERT=2 # LLM 的最大并发请求数 MAX_ASYNC=4 ``` ### 将 Lightrag 安装为 Linux 服务 从示例文件 `lightrag.service.example` 创建您的服务文件 `lightrag.service`。修改服务文件中的服务启动定义: ```text # Set Enviroment to your Python virtual enviroment Environment="PATH=/home/netman/lightrag-xyj/venv/bin" WorkingDirectory=/home/netman/lightrag-xyj # ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-server ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-gunicorn ``` > ExecStart命令必须是 lightrag-gunicorn 或 lightrag-server 中的一个,不能使用其它脚本包裹它们。因为停止服务必须要求主进程必须是这两个进程。 安装 LightRAG 服务。如果您的系统是 Ubuntu,以下命令将生效: ```shell sudo cp lightrag.service /etc/systemd/system/ sudo systemctl daemon-reload sudo systemctl start lightrag.service sudo systemctl status lightrag.service sudo systemctl enable lightrag.service ``` ## Ollama 模拟 我们为 LightRAG 提供了 Ollama 兼容接口,旨在将 LightRAG 模拟为 Ollama 聊天模型。这使得支持 Ollama 的 AI 聊天前端(如 Open WebUI)可以轻松访问 LightRAG。 ### 将 Open WebUI 连接到 LightRAG 启动 lightrag-server 后,您可以在 Open WebUI 管理面板中添加 Ollama 类型的连接。然后,一个名为 `lightrag:latest` 的模型将出现在 Open WebUI 的模型管理界面中。用户随后可以通过聊天界面向 LightRAG 发送查询。对于这种用例,最好将 LightRAG 安装为服务。 Open WebUI 使用 LLM 来执行会话标题和会话关键词生成任务。因此,Ollama 聊天补全 API 会检测并将 OpenWebUI 会话相关请求直接转发给底层 LLM。Open WebUI 的截图: ![image-20250323194750379](./README.assets/image-20250323194750379.png) ### 在聊天中选择查询模式 如果您从 LightRAG 的 Ollama 接口发送消息(查询),默认查询模式是 `hybrid`。您可以通过发送带有查询前缀的消息来选择查询模式。 查询字符串中的查询前缀可以决定使用哪种 LightRAG 查询模式来生成响应。支持的前缀包括: ``` /local /global /hybrid /naive /mix /bypass /context /localcontext /globalcontext /hybridcontext /naivecontext /mixcontext ``` 例如,聊天消息 "/mix 唐僧有几个徒弟" 将触发 LightRAG 的混合模式查询。没有查询前缀的聊天消息默认会触发混合模式查询。 "/bypass" 不是 LightRAG 查询模式,它会告诉 API 服务器将查询连同聊天历史直接传递给底层 LLM。因此用户可以使用 LLM 基于聊天历史回答问题。如果您使用 Open WebUI 作为前端,您可以直接切换到普通 LLM 模型,而不是使用 /bypass 前缀。 "/context" 也不是 LightRAG 查询模式,它会告诉 LightRAG 只返回为 LLM 准备的上下文信息。您可以检查上下文是否符合您的需求,或者自行处理上下文。 ### 在聊天中添加用户提示词 使用LightRAG进行内容查询时,应避免将搜索过程与无关的输出处理相结合,这会显著影响查询效果。用户提示(user prompt)正是为解决这一问题而设计 -- 它不参与RAG检索阶段,而是在查询完成后指导大语言模型(LLM)如何处理检索结果。我们可以在查询前缀末尾添加方括号,从而向LLM传递用户提示词: ``` /[使用mermaid格式画图] 请画出 Scrooge 的人物关系图谱 /mix[使用mermaid格式画图] 请画出 Scrooge 的人物关系图谱 ``` ## API 密钥和认证 默认情况下,LightRAG 服务器可以在没有任何认证的情况下访问。我们可以使用 API 密钥或账户凭证配置服务器以确保其安全。 * API 密钥 ``` LIGHTRAG_API_KEY=your-secure-api-key-here WHITELIST_PATHS=/health,/api/* ``` > 健康检查和 Ollama 模拟端点默认不进行 API 密钥检查。为了安全原因,如果不需要提供Ollama服务,应该把`/api/*`从WHITELIST_PATHS中移除。 API Key使用的请求头是 `X-API-Key` 。以下是使用API访问LightRAG Server的一个例子: ``` curl -X 'POST' \ 'http://localhost:9621/documents/scan' \ -H 'accept: application/json' \ -H 'X-API-Key: your-secure-api-key-here-123' \ -d '' ``` * 账户凭证(Web 界面需要登录后才能访问) LightRAG API 服务器使用基于 HS256 算法的 JWT 认证。要启用安全访问控制,需要以下环境变量: ```bash # JWT 认证 AUTH_ACCOUNTS='admin:admin123,user1:pass456' TOKEN_SECRET='your-key' TOKEN_EXPIRE_HOURS=4 ``` > 目前仅支持配置一个管理员账户和密码。尚未开发和实现完整的账户系统。 如果未配置账户凭证,Web 界面将以访客身份访问系统。因此,即使仅配置了 API 密钥,所有 API 仍然可以通过访客账户访问,这仍然不安全。因此,要保护 API,需要同时配置这两种认证方法。 ## Azure OpenAI 后端配置 可以使用以下 Azure CLI 命令创建 Azure OpenAI API(您需要先从 [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) 安装 Azure CLI): ```bash # 根据需要更改资源组名称、位置和 OpenAI 资源名称 RESOURCE_GROUP_NAME=LightRAG LOCATION=swedencentral RESOURCE_NAME=LightRAG-OpenAI az login az group create --name $RESOURCE_GROUP_NAME --location $LOCATION az cognitiveservices account create --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME --kind OpenAI --sku S0 --location swedencentral az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME --model-format OpenAI --name $RESOURCE_NAME --deployment-name gpt-4o --model-name gpt-4o --model-version "2024-08-06" --sku-capacity 100 --sku-name "Standard" az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME --model-format OpenAI --name $RESOURCE_NAME --deployment-name text-embedding-3-large --model-name text-embedding-3-large --model-version "1" --sku-capacity 80 --sku-name "Standard" az cognitiveservices account show --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME --query "properties.endpoint" az cognitiveservices account keys list --name $RESOURCE_NAME -g $RESOURCE_GROUP_NAME ``` 最后一个命令的输出将提供 OpenAI API 的端点和密钥。您可以使用这些值在 `.env` 文件中设置环境变量。 ``` # .env 中的 Azure OpenAI 配置 LLM_BINDING=azure_openai LLM_BINDING_HOST=your-azure-endpoint LLM_MODEL=your-model-deployment-name LLM_BINDING_API_KEY=your-azure-api-key ### API Version可选,默认为最新版本 AZURE_OPENAI_API_VERSION=2024-08-01-preview ### 如果使用 Azure OpenAI 进行嵌入 EMBEDDING_BINDING=azure_openai EMBEDDING_MODEL=your-embedding-deployment-name ``` ## LightRAG 服务器详细配置 API 服务器可以通过三种方式配置(优先级从高到低): * 命令行参数 * 环境变量或 .env 文件 * Config.ini(仅用于存储配置) 大多数配置都有默认设置,详细信息请查看示例文件:`.env.example`。数据存储配置也可以通过 config.ini 设置。为方便起见,提供了示例文件 `config.ini.example`。 ### 支持的 LLM 和嵌入后端 LightRAG 支持绑定到各种 LLM/嵌入后端: * ollama * openai (含openai 兼容) * azure_openai * lollms * aws_bedrock 使用环境变量 `LLM_BINDING` 或 CLI 参数 `--llm-binding` 选择 LLM 后端类型。使用环境变量 `EMBEDDING_BINDING` 或 CLI 参数 `--embedding-binding` 选择嵌入后端类型。 LLM和Embedding配置例子请查看项目根目录的 env.example 文件。OpenAI和Ollama兼容LLM接口的支持的完整配置选型可以通过一下命令查看: ``` lightrag-server --llm-binding openai --help lightrag-server --llm-binding ollama --help lightrag-server --embedding-binding ollama --help ``` > 请使用openai兼容方式访问OpenRouter、vLLM或SLang部署的LLM。可以通过 `OPENAI_LLM_EXTRA_BODY` 环境变量给OpenRouter、vLLM或SGLang推理框架传递额外的参数,实现推理模式的关闭或者其它个性化控制。 设置 `max_tokens` 参数旨在**防止在实体关系提取阶段出现LLM 响应输出过长或无休止的循环输出的问题**。设置 `max_tokens` 参数的目的是在超时发生之前截断 LLM 输出,从而防止文档提取失败。这解决了某些包含大量实体和关系的文本块(例如表格或引文)可能导致 LLM 产生过长甚至无限循环输出的问题。此设置对于本地部署的小参数模型尤为重要。`max_tokens` 值可以通过以下公式计算: ``` # For vLLM/SGLang doployed models, or most of OpenAI compatible API provider OPENAI_LLM_MAX_TOKENS=9000 # For Ollama Deployed Modeles OLLAMA_LLM_NUM_PREDICT=9000 # For OpenAI o1-mini or newer modles OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ``` ### 实体提取配置 * ENABLE_LLM_CACHE_FOR_EXTRACT:为实体提取启用 LLM 缓存(默认:true) 在测试环境中将 `ENABLE_LLM_CACHE_FOR_EXTRACT` 设置为 true 以减少 LLM 调用成本是很常见的做法。 ### 支持的存储类型 LightRAG 使用 4 种类型的存储用于不同目的: * KV_STORAGE:llm 响应缓存、文本块、文档信息 * VECTOR_STORAGE:实体向量、关系向量、块向量 * GRAPH_STORAGE:实体关系图 * DOC_STATUS_STORAGE:文档索引状态 每种存储类型都有多种存储实现方式。LightRAG Server默认的存储实现为内存数据库,数据通过文件持久化保存到WORKING_DIR目录。LightRAG还支持PostgreSQL、MongoDB、FAISS、Milvus、Qdrant、Neo4j、Memgraph和Redis等存储实现方式。详细的存储支持方式请参考根目录下的`README.md`文件中关于存储的相关内容。 **Milvus 索引配置:** LightRAG 现在可通过环境变量支持对 Milvus 向量存储的可配置索引类型(AUTOINDEX、HNSW、HNSW_SQ、IVF_FLAT 等)。HNSW_SQ 需要 Milvus 2.6.8 或更高版本,并能显著节省内存。有关完整的配置选项,请参阅主 README.md 文件中的“使用 Milvus 进行向量存储”部分。 您可以通过环境变量选择存储实现。例如,在首次启动 API 服务器之前,您可以将以下环境变量设置为特定的存储实现名称: ``` LIGHTRAG_KV_STORAGE=PGKVStorage LIGHTRAG_VECTOR_STORAGE=PGVectorStorage LIGHTRAG_GRAPH_STORAGE=PGGraphStorage LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage ``` 在向 LightRAG 添加文档后,您不能更改存储实现选择。目前尚不支持从一个存储实现迁移到另一个存储实现。更多配置信息请阅读示例 `env.exampl`e文件。 ### 在不同存储类型之间迁移LLM缓存 当LightRAG更换存储实现方式的时候,可以LLM缓存从就的存储迁移到新的存储。先以后在新的存储上重新上传文件时,将利用利用原有存储的LLM缓存大幅度加快文件处理的速度。LLM缓存迁移工具的使用方法请参考[README_MIGRATE_LLM_CACHE.md](../tools/README_MIGRATE_LLM_CACHE.md) ### LightRag API 服务器命令行选项 | 参数 | 默认值 | 描述 | |-----------|---------|-------------| | --host | 0.0.0.0 | 服务器主机 | | --port | 9621 | 服务器端口 | | --working-dir | ./rag_storage | RAG 存储的工作目录 | | --input-dir | ./inputs | 包含输入文档的目录 | | --max-async | 4 | 最大异步操作数 | | --log-level | INFO | 日志级别(DEBUG、INFO、WARNING、ERROR、CRITICAL) | | --verbose | - | 详细调试输出(True、False) | | --key | None | 用于认证的 API 密钥。保护 lightrag 服务器免受未授权访问 | | --ssl | False | 启用 HTTPS | | --ssl-certfile | None | SSL 证书文件路径(如果启用 --ssl 则必需) | | --ssl-keyfile | None | SSL 私钥文件路径(如果启用 --ssl 则必需) | | --llm-binding | ollama | LLM 绑定类型(lollms、ollama、openai、openai-ollama、azure_openai、aws_bedrock) | | --embedding-binding | ollama | 嵌入绑定类型(lollms、ollama、openai、azure_openai、aws_bedrock) | ### Reranking 配置 Reranking 查询召回的块可以显著提高检索质量,它通过基于优化的相关性评分模型对文档重新排序。LightRAG 目前支持以下 rerank 提供商: - **Cohere / vLLM**:提供与 Cohere AI 的 `v2/rerank` 端点的完整 API 集成。由于 vLLM 提供了与 Cohere 兼容的 reranker API,因此也支持所有通过 vLLM 部署的 reranker 模型。 - **Jina AI**:提供与所有 Jina rerank 模型的完全实现兼容性。 - **阿里云**:具有旨在支持阿里云 rerank API 格式的自定义实现。 Rerank 提供商通过 `.env` 文件进行配置。以下是使用 vLLM 本地部署的 rerank 模型的示例配置: ``` RERANK_BINDING=cohere RERANK_MODEL=BAAI/bge-reranker-v2-m3 RERANK_BINDING_HOST=http://localhost:8000/rerank RERANK_BINDING_API_KEY=your_rerank_api_key_here ``` 以下是使用阿里云提供的 Reranker 服务的示例配置: ``` RERANK_BINDING=aliyun RERANK_MODEL=gte-rerank-v2 RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank RERANK_BINDING_API_KEY=your_rerank_api_key_here ``` 有关完整的 reranker 配置示例,请参阅 `env.example` 文件。 ### 启用 Reranking 可以按查询启用或禁用 Reranking。 `/query` 和 `/query/stream` API 端点包含一个 `enable_rerank` 参数,默认设置为 `true`,用于控制当前查询是否激活 reranking。要将 `enable_rerank` 参数的默认值更改为 `false`,请设置以下环境变量: ``` RERANK_BY_DEFAULT=False ``` ### 在参考文件中包含文本块内容 默认情况下 `/query` and `/query/stream` 端点在返回引用内容仅包括 `reference_id` 和 `file_path`. 为了评估、调试或引用的需要,你可以要求在返回的引用内容包括实际检索到的文本块内容. 参数 `include_chunk_content` (默认值: `false`) 将控制返回的引用内容总是否包含召回文本块中的原文内容。这对于一下情形是非常有用的: - **RAG 评估**: 类似 RAGAS 这一类评估系统的工作需要获取到召回的原文才能工作 - **Debugging**: 检查和验证用于生成答案到底使用了哪些原文 - **Citation Display**: 向用户展现回答应用了哪些原文 - **Transparency**: 为RAG检索提供一个可以观察的过程 **重要**: `content` 字段是一个**字符串数组**,其中每个字符串代表来自同一文件的分块(chunk)。由于单个文件可能对应多个分块,因此内容以列表形式返回,以保留分块边界。 **API请求示例:** ```json { "query": "What is LightRAG?", "mode": "mix", "include_references": true, "include_chunk_content": true } ``` **响应示例(含文本块内容):** ```json { "response": "LightRAG is a graph-based RAG system...", "references": [ { "reference_id": "1", "file_path": "/documents/intro.md", "content": [ "LightRAG is a retrieval-augmented generation system that combines knowledge graphs with vector similarity search...", "The system uses a dual-indexing approach with both vector embeddings and graph structures for enhanced retrieval..." ] }, { "reference_id": "2", "file_path": "/documents/features.md", "content": [ "The system provides multiple query modes including local, global, hybrid, and mix modes..." ] } ] } ``` **说明**: - 此参数仅用于配合 `include_references=true` 参数工作. 如果没有包含引用参数,`include_chunk_content=true` 设置是不会生效的. - **破坏性变化**: 之前版本返回的 `content` 是一个链接在一起的字符串。现在返回的是一个字符串数组,每个字符串代表一个分块的内容。这是为了保留分块边界,避免在合并时丢失信息。如果需要将所有分块合并为一个字符串,可使用 `"\n\n".join(content)` 等方法。 ### .env 文件示例 ```bash ### Server Configuration # HOST=0.0.0.0 PORT=9621 WORKERS=2 ### Settings for document indexing ENABLE_LLM_CACHE_FOR_EXTRACT=true SUMMARY_LANGUAGE=Chinese MAX_PARALLEL_INSERT=2 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) TIMEOUT=150 MAX_ASYNC=4 LLM_BINDING=openai LLM_MODEL=gpt-4o-mini LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your-api-key ### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) # see also env.ollama-binding-options.example for fine tuning ollama EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 ### For JWT Auth # AUTH_ACCOUNTS='admin:admin123,user1:pass456' # TOKEN_SECRET=your-key-for-LightRAG-API-Server-xxx # TOKEN_EXPIRE_HOURS=48 # LIGHTRAG_API_KEY=your-secure-api-key-here-123 # WHITELIST_PATHS=/api/* # WHITELIST_PATHS=/health,/api/* ``` ## 文档和块处理逻辑说明 LightRAG 中的文档处理流程有些复杂,分为两个主要阶段:提取阶段(实体和关系提取)和合并阶段(实体和关系合并)。有两个关键参数控制流程并发性:并行处理的最大文件数(`MAX_PARALLEL_INSERT`)和最大并发 LLM 请求数(`MAX_ASYNC`)。工作流程描述如下: 1. `MAX_ASYNC` 限制系统中并发 LLM 请求的总数,包括查询、提取和合并的请求。LLM 请求具有不同的优先级:查询操作优先级最高,其次是合并,然后是提取。 2. `MAX_PARALLEL_INSERT` 控制提取阶段并行处理的文件数量。`MAX_PARALLEL_INSERT`建议设置为2~10之间,通常设置为 `MAX_ASYNC/3`,设置太大会导致合并阶段不同文档之间实体和关系重名的机会增大,降低合并阶段的效率。 3. 在单个文件中,来自不同文本块的实体和关系提取是并发处理的,并发度由 `MAX_ASYNC` 设置。只有在处理完 `MAX_ASYNC` 个文本块后,系统才会继续处理同一文件中的下一批文本块。 4. 当一个文件完成实体和关系提后,将进入实体和关系合并阶段。这一阶段也会并发处理多个实体和关系,其并发度同样是由 `MAX_ASYNC` 控制。 5. 合并阶段的 LLM 请求的优先级别高于提取阶段,目的是让进入合并阶段的文件尽快完成处理,并让处理结果尽快更新到向量数据库中。 6. 为防止竞争条件,合并阶段会避免并发处理同一个实体或关系,当多个文件中都涉及同一个实体或关系需要合并的时候他们会串行执行。 7. 每个文件在流程中被视为一个原子处理单元。只有当其所有文本块都完成提取和合并后,文件才会被标记为成功处理。如果在处理过程中发生任何错误,整个文件将被标记为失败,并且必须重新处理。 8. 当由于错误而重新处理文件时,由于 LLM 缓存,先前处理的文本块可以快速跳过。尽管 LLM 缓存在合并阶段也会被利用,但合并顺序的不一致可能会限制其在此阶段的有效性。 9. 如果在提取过程中发生错误,系统不会保留任何中间结果。如果在合并过程中发生错误,已合并的实体和关系可能会被保留;当重新处理同一文件时,重新提取的实体和关系将与现有实体和关系合并,而不会影响查询结果。 10. 在合并阶段结束时,所有实体和关系数据都会在向量数据库中更新。如果此时发生错误,某些更新可能会被保留。但是,下一次处理尝试将覆盖先前结果,确保成功重新处理的文件不会影响未来查询结果的完整性。 大型文件应分割成较小的片段以启用增量处理。可以通过在 Web UI 上按“扫描”按钮来启动失败文件的重新处理。 ## API 端点 所有服务器(LoLLMs、Ollama、OpenAI 和 Azure OpenAI)都为 RAG 功能提供相同的 REST API 端点。当 API 服务器运行时,访问: - Swagger UI:http://localhost:9621/docs - ReDoc:http://localhost:9621/redoc 您可以使用提供的 curl 命令或通过 Swagger UI 界面测试 API 端点。确保: 1. 启动适当的后端服务(LoLLMs、Ollama 或 OpenAI) 2. 启动 RAG 服务器 3. 使用文档管理端点上传一些文档 4. 使用查询端点查询系统 5. 如果在输入目录中放入新文件,触发文档扫描 ## 异步文档索引与进度跟踪 LightRAG采用异步文档索引机制,便于前端监控和查询文档处理进度。用户通过指定端点上传文件或插入文本时,系统将返回唯一的跟踪ID,以便实时监控处理进度。 **支持生成跟踪ID的API端点:** * `/documents/upload` * `/documents/text` * `/documents/texts` **文档处理状态查询端点:** * `/track_status/{track_id}` 该端点提供全面的状态信息,包括: * 文档处理状态(待处理/处理中/已处理/失败) * 内容摘要和元数据 * 处理失败时的错误信息 * 创建和更新时间戳 ================================================ FILE: lightrag/api/README.md ================================================ # LightRAG Server and WebUI The LightRAG Server is designed to provide a Web UI and API support. The Web UI facilitates document indexing, knowledge graph exploration, and a simple RAG query interface. LightRAG Server also provides an Ollama-compatible interface, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat bots, such as Open WebUI, to access LightRAG easily. ![image-20250323122538997](./README.assets/image-20250323122538997.png) ![image-20250323122754387](./README.assets/image-20250323122754387.png) ![image-20250323123011220](./README.assets/image-20250323123011220.png) ## Getting Started ### Installation * Install from PyPI ```bash ### Install LightRAG Server as tool using uv (recommended) uv tool install "lightrag-hku[api]" ### Or using pip # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install "lightrag-hku[api]" ``` * Installation from Source ```bash # Clone the repository git clone https://github.com/HKUDS/lightrag.git # Change to the repository directory cd lightrag # Using uv (recommended) # Note: uv sync automatically creates a virtual environment in .venv/ uv sync --extra api source .venv/bin/activate # Activate the virtual environment (Linux/macOS) # Or on Windows: .venv\Scripts\activate # Or using pip with virtual environment # python -m venv .venv # source .venv/bin/activate # Windows: .venv\Scripts\activate # pip install -e ".[api]" # Build front-end artifacts cd lightrag_webui bun install --frozen-lockfile bun run build cd .. ``` ### Before Starting LightRAG Server LightRAG necessitates the integration of both an LLM (Large Language Model) and an Embedding Model to effectively execute document indexing and querying operations. Prior to the initial deployment of the LightRAG server, it is essential to configure the settings for both the LLM and the Embedding Model. LightRAG supports binding to various LLM/Embedding backends: * ollama * lollms * openai or openai compatible * azure_openai * aws_bedrock * gemini It is recommended to use environment variables to configure the LightRAG Server. There is an example environment variable file named `env.example` in the root directory of the project. Please copy this file to the startup directory and rename it to `.env`. After that, you can modify the parameters related to the LLM and Embedding models in the `.env` file. It is important to note that the LightRAG Server will load the environment variables from `.env` into the system environment variables each time it starts. **LightRAG Server will prioritize the settings in the system environment variables to .env file**. > Since VS Code with the Python extension may automatically load the .env file in the integrated terminal, please open a new terminal session after each modification to the .env file. Here are some examples of common settings for LLM and Embedding models: * OpenAI LLM + Ollama Embedding: ``` LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 # EMBEDDING_BINDING_API_KEY=your_api_key ``` > When targeting Google Gemini, set `LLM_BINDING=gemini`, choose a model such as `LLM_MODEL=gemini-flash-latest`, and provide your Gemini key via `LLM_BINDING_API_KEY` (or `GEMINI_API_KEY`). * Ollama LLM + Ollama Embedding: ``` LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key ### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000) OLLAMA_LLM_NUM_CTX=16384 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 # EMBEDDING_BINDING_API_KEY=your_api_key ``` > **Important Note**: The Embedding model must be determined before document indexing, and the same model must be used during the document query phase. For certain storage solutions (e.g., PostgreSQL), the vector dimension must be defined upon initial table creation. Therefore, when changing embedding models, it is necessary to delete the existing vector-related tables and allow LightRAG to recreate them with the new dimensions. ### Create .env File With Setup Tool Instead of editing `env.example` by hand, you can use the interactive setup wizard to generate a configured `.env` and, when needed, `docker-compose.final.yml`: ```bash make env-base # Required first step: LLM, embedding, reranker make env-storage # Optional: storage backends and database services make env-server # Optional: server port, auth, and SSL make env-security-check # Optional: audit the current .env for security risks ``` For a full description of every target and what each flow does, see [docs/InteractiveSetup.md](../../docs/InteractiveSetup.md). The setup wizards update configuration only; run `make env-security-check` separately to audit the current `.env` for security risks before deployment. ### Starting LightRAG Server The LightRAG Server supports two operational modes: * The simple and efficient Uvicorn mode: ``` lightrag-server ``` * The multiprocess Gunicorn + Uvicorn mode (production mode, not supported on Windows environments): ``` lightrag-gunicorn --workers 4 ``` When starting LightRAG, the current working directory must contain the `.env` configuration file. **It is intentionally designed that the `.env` file must be placed in the startup directory**. The purpose of this is to allow users to launch multiple LightRAG instances simultaneously and configure different `.env` files for different instances. **After modifying the `.env` file, you need to reopen the terminal for the new settings to take effect.** This is because each time LightRAG Server starts, it loads the environment variables from the `.env` file into the system environment variables, and system environment variables have higher precedence. During startup, configurations in the `.env` file can be overridden by command-line parameters. Common command-line parameters include: - `--host`: Server listening address (default: 0.0.0.0) - `--port`: Server listening port (default: 9621) - `--timeout`: LLM request timeout (default: 150 seconds) - `--log-level`: Log level (default: INFO) - `--working-dir`: Database persistence directory (default: ./rag_storage) - `--input-dir`: Directory for uploaded files (default: ./inputs) - `--workspace`: Workspace name, used to logically isolate data between multiple LightRAG instances (default: empty) ### Launching LightRAG Server with Docker Using Docker Compose is the most convenient way to deploy and run the LightRAG Server. - Create a project directory. - Copy the `docker-compose.yml` file from the LightRAG repository into your project directory. - Prepare the `.env` file: Duplicate the sample file [`env.example`](https://ai.znipower.com:5013/c/env.example)to create a customized `.env` file, and configure the LLM and embedding parameters according to your specific requirements. - Start the LightRAG Server with the following command: ```shell docker compose up # If you want the program to run in the background after startup, add the -d parameter at the end of the command. ``` You can get the official docker compose file from here: [docker-compose.yml](https://raw.githubusercontent.com/HKUDS/LightRAG/refs/heads/main/docker-compose.yml). For historical versions of LightRAG docker images, visit this link: [LightRAG Docker Images](https://github.com/HKUDS/LightRAG/pkgs/container/lightrag). For more details about docker deployment, please refer to [DockerDeployment.md](./../../docs/DockerDeployment.md). ### Nginx Reverse Proxy Configuration When using Nginx as a reverse proxy in front of LightRAG Server, you need to configure `client_max_body_size` for the `/documents/upload` endpoint to handle large file uploads. Without this configuration, Nginx will reject files larger than 1MB (the default limit) with a `413 Request Entity Too Large` error before the request reaches LightRAG. **Recommended Configuration:** ```nginx server { listen 80; server_name your-domain.com; # Global default: 8MB for LLM queries with long context client_max_body_size 8M; # Upload endpoint: 100MB for large file uploads location /documents/upload { client_max_body_size 100M; proxy_pass http://localhost:9621; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Increase timeouts for large file uploads proxy_read_timeout 300s; proxy_send_timeout 300s; } # Streaming endpoints: LLM response streaming location ~ ^/(query/stream|api/chat|api/generate) { gzip off; # Disable compression for streaming responses proxy_pass http://localhost:9621; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Long timeout for LLM generation proxy_read_timeout 300s; } # Other endpoints location / { proxy_pass http://localhost:9621; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } ``` **Key Points:** 1. **Global Limit (8MB)**: Sufficient for LLM queries with long conversation history and context (128K tokens ≈ 512KB + JSON overhead). 2. **Upload Endpoint (100MB)**: Must match or exceed `MAX_UPLOAD_SIZE` in your `.env` file. The default `MAX_UPLOAD_SIZE` is 100MB. 3. **Streaming Endpoints**: Disable gzip compression (`gzip off`) for streaming endpoints to ensure real-time response delivery. LightRAG automatically sets `X-Accel-Buffering: no` header to disable response buffering. 4. **Timeout Settings**: Large file uploads and LLM generation require longer timeouts; adjust `proxy_read_timeout` and `proxy_send_timeout` accordingly. 5. **Size Validation Layers**: - Nginx validates the `Content-Length` header first - LightRAG performs streaming validation during upload - Setting appropriate limits at both layers ensures better error messages and security ### Offline Deployment Official LightRAG Docker images are fully compatible with offline or air-gapped environments. If you want to build up you own offline enviroment, please refer to [Offline Deployment Guide](./../../docs/OfflineDeployment.md). ### Starting Multiple LightRAG Instances There are two ways to start multiple LightRAG instances. The first way is to configure a completely independent working environment for each instance. This requires creating a separate working directory for each instance and placing a dedicated `.env` configuration file in that directory. The server listening ports in the configuration files of different instances cannot be the same. Then, you can start the service by running `lightrag-server` in the working directory. The second way is for all instances to share the same set of `.env` configuration files, and then use command-line arguments to specify different server listening ports and workspaces for each instance. You can start multiple LightRAG instances in the same working directory with different command-line arguments. For example: ``` # Start instance 1 lightrag-server --port 9621 --workspace space1 # Start instance 2 lightrag-server --port 9622 --workspace space2 ``` The purpose of a workspace is to achieve data isolation between different instances. Therefore, the `workspace` parameter must be different for different instances; otherwise, it will lead to data confusion and corruption. When launching multiple LightRAG instances via Docker Compose, simply specify unique `WORKSPACE` and `PORT` environment variables for each container within your `docker-compose.yml`. Even if all instances share a common `.env` file, the container-specific environment variables defined in Compose will take precedence, ensuring independent configurations for each instance. ### Data Isolation Between LightRAG Instances Configuring an independent working directory and a dedicated `.env` configuration file for each instance can generally ensure that locally persisted files in the in-memory database are saved in their respective working directories, achieving data isolation. By default, LightRAG uses all in-memory databases, and this method of data isolation is sufficient. However, if you are using an external database, and different instances access the same database instance, you need to use workspaces to achieve data isolation; otherwise, the data of different instances will conflict and be destroyed. The command-line `workspace` argument and the `WORKSPACE` environment variable in the `.env` file can both be used to specify the workspace name for the current instance, with the command-line argument having higher priority. Here is how workspaces are implemented for different types of storage: - **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`. - **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`. - **For Qdrant vector database, data isolation is achieved through payload-based partitioning (Qdrant's recommended multitenancy approach):** `QdrantVectorDBStorage` uses shared collections with payload filtering for unlimited workspace scalability. - **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`. - **For graph databases, logical data isolation is achieved through labels:** `Neo4JStorage`, `MemgraphStorage` - **For OpenSearch, data isolation is achieved through index name prefixes:** `OpenSearchKVStorage`, `OpenSearchDocStatusStorage`, `OpenSearchGraphStorage`, `OpenSearchVectorDBStorage` To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`, `MEMGRAPH_WORKSPACE`, `OPENSEARCH_WORKSPACE`. ### Multiple workers for Gunicorn + Uvicorn The LightRAG Server can operate in the `Gunicorn + Uvicorn` preload mode. Gunicorn's multiple worker (multiprocess) capability prevents document indexing tasks from blocking RAG queries. Using CPU-exhaustive document extraction tools, such as docling, can lead to the entire system being blocked in pure Uvicorn mode. Though LightRAG Server uses one worker to process the document indexing pipeline, with the async task support of Uvicorn, multiple files can be processed in parallel. The bottleneck of document indexing speed mainly lies with the LLM. If your LLM supports high concurrency, you can accelerate document indexing by increasing the concurrency level of the LLM. Below are several environment variables related to concurrent processing, along with their default values: ``` ### Number of worker processes, not greater than (2 x number_of_cores) + 1 WORKERS=2 ### Number of parallel files to process in one batch MAX_PARALLEL_INSERT=2 ### Max concurrent requests to the LLM MAX_ASYNC=4 ``` ### Install LightRAG as a Linux Service Create your service file `lightrag.service` from the sample file: `lightrag.service.example`. Modify the start options the service file: ```text # Set Enviroment to your Python virtual enviroment Environment="PATH=/home/netman/lightrag-xyj/venv/bin" WorkingDirectory=/home/netman/lightrag-xyj # ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-server ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-gunicorn ``` > The ExecStart command must be either `lightrag-gunicorn` or `lightrag-server`; no wrapper scripts are allowed. This is because service termination requires the main process to be one of these two executables. Install LightRAG service. If your system is Ubuntu, the following commands will work: ```shell sudo cp lightrag.service /etc/systemd/system/ sudo systemctl daemon-reload sudo systemctl start lightrag.service sudo systemctl status lightrag.service sudo systemctl enable lightrag.service ``` ## Ollama Emulation We provide Ollama-compatible interfaces for LightRAG, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat frontends supporting Ollama, such as Open WebUI, to access LightRAG easily. ### Connect Open WebUI to LightRAG After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin panel. And then a model named `lightrag:latest` will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface. You should install LightRAG as a service for this use case. Open WebUI uses an LLM to do the session title and session keyword generation task. So the Ollama chat completion API detects and forwards OpenWebUI session-related requests directly to the underlying LLM. Screenshot from Open WebUI: ![image-20250323194750379](./README.assets/image-20250323194750379.png) ### Choose Query mode in chat The default query mode is `hybrid` if you send a message (query) from the Ollama interface of LightRAG. You can select query mode by sending a message with a query prefix. A query prefix in the query string can determine which LightRAG query mode is used to generate the response for the query. The supported prefixes include: ``` /local /global /hybrid /naive /mix /bypass /context /localcontext /globalcontext /hybridcontext /naivecontext /mixcontext ``` For example, the chat message `/mix What's LightRAG?` will trigger a mix mode query for LightRAG. A chat message without a query prefix will trigger a hybrid mode query by default. `/bypass` is not a LightRAG query mode; it will tell the API Server to pass the query directly to the underlying LLM, including the chat history. So the user can use the LLM to answer questions based on the chat history. If you are using Open WebUI as a front end, you can just switch the model to a normal LLM instead of using the `/bypass` prefix. `/context` is also not a LightRAG query mode; it will tell LightRAG to return only the context information prepared for the LLM. You can check the context if it's what you want, or process the context by yourself. ### Add user prompt in chat When using LightRAG for content queries, avoid combining the search process with unrelated output processing, as this significantly impacts query effectiveness. User prompt is specifically designed to address this issue — it does not participate in the RAG retrieval phase, but rather guides the LLM on how to process the retrieved results after the query is completed. We can append square brackets to the query prefix to provide the LLM with the user prompt: ``` /[Use mermaid format for diagrams] Please draw a character relationship diagram for Scrooge /mix[Use mermaid format for diagrams] Please draw a character relationship diagram for Scrooge ``` ## API Key and Authentication By default, the LightRAG Server can be accessed without any authentication. We can configure the server with an API Key or account credentials to secure it. * API Key: ``` LIGHTRAG_API_KEY=your-secure-api-key-here WHITELIST_PATHS=/health,/api/* ``` > Health check and Ollama emulation endpoints are excluded from API Key check by default. For security reasons, remove `/api/*` from `WHITELIST_PATHS` if the Ollama service is not required. The API key is passed using the request header `X-API-Key`. Below is an example of accessing the LightRAG Server via API: ``` curl -X 'POST' \ 'http://localhost:9621/documents/scan' \ -H 'accept: application/json' \ -H 'X-API-Key: your-secure-api-key-here-123' \ -d '' ``` * Account credentials (the Web UI requires login before access can be granted): LightRAG API Server implements JWT-based authentication using the HS256 algorithm. To enable secure access control, the following environment variables are required: ```bash # For jwt auth AUTH_ACCOUNTS='admin:admin123,user1:pass456' TOKEN_SECRET='your-key' TOKEN_EXPIRE_HOURS=4 ``` > Currently, only the configuration of an administrator account and password is supported. A comprehensive account system is yet to be developed and implemented. If Account credentials are not configured, the Web UI will access the system as a Guest. Therefore, even if only an API Key is configured, all APIs can still be accessed through the Guest account, which remains insecure. Hence, to safeguard the API, it is necessary to configure both authentication methods simultaneously. ## For Azure OpenAI Backend Azure OpenAI API can be created using the following commands in Azure CLI (you need to install Azure CLI first from [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)): ```bash # Change the resource group name, location, and OpenAI resource name as needed RESOURCE_GROUP_NAME=LightRAG LOCATION=swedencentral RESOURCE_NAME=LightRAG-OpenAI az login az group create --name $RESOURCE_GROUP_NAME --location $LOCATION az cognitiveservices account create --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME --kind OpenAI --sku S0 --location swedencentral az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME --model-format OpenAI --name $RESOURCE_NAME --deployment-name gpt-4o --model-name gpt-4o --model-version "2024-08-06" --sku-capacity 100 --sku-name "Standard" az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME --model-format OpenAI --name $RESOURCE_NAME --deployment-name text-embedding-3-large --model-name text-embedding-3-large --model-version "1" --sku-capacity 80 --sku-name "Standard" az cognitiveservices account show --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME --query "properties.endpoint" az cognitiveservices account keys list --name $RESOURCE_NAME -g $RESOURCE_GROUP_NAME ``` The output of the last command will give you the endpoint and the key for the OpenAI API. You can use these values to set the environment variables in the `.env` file. ``` # Azure OpenAI Configuration in .env: LLM_BINDING=azure_openai LLM_BINDING_HOST=your-azure-endpoint LLM_MODEL=your-model-deployment-name LLM_BINDING_API_KEY=your-azure-api-key ### API version is optional, defaults to latest version AZURE_OPENAI_API_VERSION=2024-08-01-preview ### If using Azure OpenAI for embeddings EMBEDDING_BINDING=azure_openai EMBEDDING_MODEL=your-embedding-deployment-name ``` ## LightRAG Server Configuration in Detail The API Server can be configured in three ways (highest priority first): * Command line arguments * Environment variables or .env file * Config.ini (Only for storage configuration) Most of the configurations come with default settings; check out the details in the sample file: `.env.example`. Data storage configuration can also be set by config.ini. A sample file `config.ini.example` is provided for your convenience. ### LLM and Embedding Backend Supported LightRAG supports binding to various LLM/Embedding backends: * ollama * openai (including openai compatible) * azure_openai * lollms * aws_bedrock Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type. For LLM and embedding configuration examples, please refer to the `env.example` file in the project's root directory. To view the complete list of configurable options for OpenAI and Ollama-compatible LLM interfaces, use the following commands: ``` lightrag-server --llm-binding openai --help lightrag-server --llm-binding ollama --help lightrag-server --embedding-binding ollama --help ``` > Please use OpenAI-compatible method to access LLMs deployed by OpenRouter or vLLM/SGLang. You can pass additional parameters to OpenRouter or vLLM/SGLang through the `OPENAI_LLM_EXTRA_BODY` environment variable to disable reasoning mode or achieve other personalized controls. Set the max_tokens to **prevent excessively long or endless output loop** during the entity relationship extraction phase for Large Language Model (LLM) responses. The purpose of setting max_tokens parameter is to truncate LLM output before timeouts occur, thereby preventing document extraction failures. This addresses issues where certain text blocks (e.g., tables or citations) containing numerous entities and relationships can lead to overly long or even endless loop outputs from LLMs. This setting is particularly crucial for locally deployed, smaller-parameter models. Max tokens value can be calculated by this formula: `LLM_TIMEOUT * llm_output_tokens/second` (i.e. `180s * 50 tokens/s = 9000`) ``` # For vLLM/SGLang doployed models, or most of OpenAI compatible API provider OPENAI_LLM_MAX_TOKENS=9000 # For Ollama Deployed Modeles OLLAMA_LLM_NUM_PREDICT=9000 # For OpenAI o1-mini or newer modles OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ``` ### Entity Extraction Configuration * ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true) It's very common to set `ENABLE_LLM_CACHE_FOR_EXTRACT` to true for a test environment to reduce the cost of LLM calls. ### Storage Types Supported LightRAG uses 4 types of storage for different purposes: * KV_STORAGE: llm response cache, text chunks, document information * VECTOR_STORAGE: entities vectors, relation vectors, chunks vectors * GRAPH_STORAGE: entity relation graph * DOC_STATUS_STORAGE: document indexing status LightRAG Server offers various storage implementations, with the default being an in-memory database that persists data to the WORKING_DIR directory. Additionally, LightRAG supports a wide range of storage solutions including PostgreSQL, MongoDB, FAISS, Milvus, Qdrant, Neo4j, Memgraph, and Redis. For detailed information on supported storage options, please refer to the storage section in the README.md file located in the root directory. **Milvus Index Configuration:** LightRAG now supports configurable index types for Milvus vector storage (AUTOINDEX, HNSW, HNSW_SQ, IVF_FLAT, etc.) through environment variables. HNSW_SQ requires Milvus 2.6.8+ and provides significant memory savings. See the "Using Milvus for Vector Storage" section in the main README.md for complete configuration options. You can select the storage implementation by configuring environment variables. For instance, prior to the initial launch of the API server, you can set the following environment variable to specify your desired storage implementation: ``` LIGHTRAG_KV_STORAGE=PGKVStorage LIGHTRAG_VECTOR_STORAGE=PGVectorStorage LIGHTRAG_GRAPH_STORAGE=PGGraphStorage LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage ``` You cannot change storage implementation selection after adding documents to LightRAG. Data migration from one storage implementation to another is not supported yet. For further information, please read the sample env file or config.ini file. ### LLM Cache Migration Between Storage Types When switching the storage implementation in LightRAG, the LLM cache can be migrated from the existing storage to the new one. Subsequently, when re-uploading files to the new storage, the pre-existing LLM cache will significantly accelerate file processing. For detailed instructions on using the LLM cache migration tool, please refer to[README_MIGRATE_LLM_CACHE.md](../tools/README_MIGRATE_LLM_CACHE.md) ### LightRAG API Server Command Line Options | Parameter | Default | Description | | --------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | | --host | 0.0.0.0 | Server host | | --port | 9621 | Server port | | --working-dir | ./rag_storage | Working directory for RAG storage | | --input-dir | ./inputs | Directory containing input documents | | --max-async | 4 | Maximum number of async operations | | --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) | | --verbose | - | Verbose debug output (True, False) | | --key | None | API key for authentication. Protects the LightRAG server against unauthorized access | | --ssl | False | Enable HTTPS | | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) | | --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) | | --llm-binding | ollama | LLM binding type (lollms, ollama, openai, openai-ollama, azure_openai, aws_bedrock) | | --embedding-binding | ollama | Embedding binding type (lollms, ollama, openai, azure_openai, aws_bedrock) | ### Reranking Configuration Reranking query-recalled chunks can significantly enhance retrieval quality by re-ordering documents based on an optimized relevance scoring model. LightRAG currently supports the following rerank providers: - **Cohere / vLLM**: Offers full API integration with Cohere AI's `v2/rerank` endpoint. As vLLM provides a Cohere-compatible reranker API, all reranker models deployed via vLLM are also supported. - **Jina AI**: Provides complete implementation compatibility with all Jina rerank models. - **Aliyun**: Features a custom implementation designed to support Aliyun's rerank API format. The rerank provider is configured via the `.env` file. Below is an example configuration for a rerank model deployed locally using vLLM: ``` RERANK_BINDING=cohere RERANK_MODEL=BAAI/bge-reranker-v2-m3 RERANK_BINDING_HOST=http://localhost:8000/rerank RERANK_BINDING_API_KEY=your_rerank_api_key_here ``` Here is an example configuration for utilizing the Reranker service provided by Aliyun: ``` RERANK_BINDING=aliyun RERANK_MODEL=gte-rerank-v2 RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank RERANK_BINDING_API_KEY=your_rerank_api_key_here ``` For comprehensive reranker configuration examples, please refer to the `env.example` file. ### Enable Reranking Reranking can be enabled or disabled on a per-query basis. The `/query` and `/query/stream` API endpoints include an `enable_rerank` parameter, which is set to `true` by default, controlling whether reranking is active for the current query. To change the default value of the `enable_rerank` parameter to `false`, set the following environment variable: ``` RERANK_BY_DEFAULT=False ``` ### Include Chunk Content in References By default, the `/query` and `/query/stream` endpoints return references with only `reference_id` and `file_path`. For evaluation, debugging, or citation purposes, you can request the actual retrieved chunk content to be included in references. The `include_chunk_content` parameter (default: `false`) controls whether the actual text content of retrieved chunks is included in the response references. This is particularly useful for: - **RAG Evaluation**: Testing systems like RAGAS that need access to retrieved contexts - **Debugging**: Verifying what content was actually used to generate the answer - **Citation Display**: Showing users the exact text passages that support the response - **Transparency**: Providing full visibility into the RAG retrieval process **Important**: The `content` field is an **array of strings**, where each string represents a chunk from the same file. A single file may correspond to multiple chunks, so the content is returned as a list to preserve chunk boundaries. **Example API Request:** ```json { "query": "What is LightRAG?", "mode": "mix", "include_references": true, "include_chunk_content": true } ``` **Example Response (with chunk content):** ```json { "response": "LightRAG is a graph-based RAG system...", "references": [ { "reference_id": "1", "file_path": "/documents/intro.md", "content": [ "LightRAG is a retrieval-augmented generation system that combines knowledge graphs with vector similarity search...", "The system uses a dual-indexing approach with both vector embeddings and graph structures for enhanced retrieval..." ] }, { "reference_id": "2", "file_path": "/documents/features.md", "content": [ "The system provides multiple query modes including local, global, hybrid, and mix modes..." ] } ] } ``` **Notes**: - This parameter only works when `include_references=true`. Setting `include_chunk_content=true` without including references has no effect. - **Breaking Change**: Prior versions returned `content` as a single concatenated string. Now it returns an array of strings to preserve individual chunk boundaries. If you need a single string, join the array elements with your preferred separator (e.g., `"\n\n".join(content)`). ### .env Examples ```bash ### Server Configuration # HOST=0.0.0.0 PORT=9621 WORKERS=2 ### Settings for document indexing ENABLE_LLM_CACHE_FOR_EXTRACT=true SUMMARY_LANGUAGE=Chinese MAX_PARALLEL_INSERT=2 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) TIMEOUT=150 MAX_ASYNC=4 LLM_BINDING=openai LLM_MODEL=gpt-4o-mini LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your-api-key ### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) # see also env.ollama-binding-options.example for fine tuning ollama EMBEDDING_MODEL=bge-m3:latest EMBEDDING_DIM=1024 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 ### For JWT Auth # AUTH_ACCOUNTS='admin:admin123,user1:pass456' # TOKEN_SECRET=your-key-for-LightRAG-API-Server-xxx # TOKEN_EXPIRE_HOURS=48 # LIGHTRAG_API_KEY=your-secure-api-key-here-123 # WHITELIST_PATHS=/api/* # WHITELIST_PATHS=/health,/api/* ``` ## Document and Chunk Processing The document processing pipeline in LightRAG is somewhat complex and is divided into two primary stages: the Extraction stage (entity and relationship extraction) and the Merging stage (entity and relationship merging). There are two key parameters that control pipeline concurrency: the maximum number of files processed in parallel (MAX_PARALLEL_INSERT) and the maximum number of concurrent LLM requests (MAX_ASYNC). The workflow is described as follows: 1. MAX_ASYNC limits the total number of concurrent LLM requests in the system, including those for querying, extraction, and merging. LLM requests have different priorities: query operations have the highest priority, followed by merging, and then extraction. 2. MAX_PARALLEL_INSERT controls the number of files processed in parallel during the extraction stage. For optimal performance, MAX_PARALLEL_INSERT is recommended to be set between 2 and 10, typically MAX_ASYNC/3. Setting this value too high can increase the likelihood of naming conflicts among entities and relationships across different documents during the merge phase, thereby reducing its overall efficiency. 3. Within a single file, entity and relationship extractions from different text blocks are processed concurrently, with the degree of concurrency set by MAX_ASYNC. Only after MAX_ASYNC text blocks are processed will the system proceed to the next batch within the same file. 4. When a file completes entity and relationship extraction, it enters the entity and relationship merging stage. This stage also processes multiple entities and relationships concurrently, with the concurrency level also controlled by `MAX_ASYNC`. 5. LLM requests for the merging stage are prioritized over the extraction stage to ensure that files in the merging phase are processed quickly and their results are promptly updated in the vector database. 6. To prevent race conditions, the merging stage avoids concurrent processing of the same entity or relationship. When multiple files involve the same entity or relationship that needs to be merged, they are processed serially. 7. Each file is treated as an atomic processing unit in the pipeline. A file is marked as successfully processed only after all its text blocks have completed extraction and merging. If any error occurs during processing, the entire file is marked as failed and must be reprocessed. 8. When a file is reprocessed due to errors, previously processed text blocks can be quickly skipped thanks to LLM caching. Although LLM cache is also utilized during the merging stage, inconsistencies in merging order may limit its effectiveness in this stage. 9. If an error occurs during extraction, the system does not retain any intermediate results. If an error occurs during merging, already merged entities and relationships might be preserved; when the same file is reprocessed, re-extracted entities and relationships will be merged with the existing ones, without impacting the query results. 10. At the end of the merging stage, all entity and relationship data are updated in the vector database. Should an error occur at this point, some updates may be retained. However, the next processing attempt will overwrite previous results, ensuring that successfully reprocessed files do not affect the integrity of future query results. Large files should be divided into smaller segments to enable incremental processing. Reprocessing of failed files can be initiated by pressing the "Scan" button on the web UI. ## API Endpoints All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When the API Server is running, visit: - Swagger UI: http://localhost:9621/docs - ReDoc: http://localhost:9621/redoc You can test the API endpoints using the provided curl commands or through the Swagger UI interface. Make sure to: 1. Start the appropriate backend service (LoLLMs, Ollama, or OpenAI) 2. Start the RAG server 3. Upload some documents using the document management endpoints 4. Query the system using the query endpoints 5. Trigger document scan if new files are put into the inputs directory ## Asynchronous Document Indexing with Progress Tracking LightRAG implements asynchronous document indexing to enable frontend monitoring and querying of document processing progress. Upon uploading files or inserting text through designated endpoints, a unique Track ID is returned to facilitate real-time progress monitoring. **API Endpoints Supporting Track ID Generation:** * `/documents/upload` * `/documents/text` * `/documents/texts` **Document Processing Status Query Endpoint:** * `/track_status/{track_id}` This endpoint provides comprehensive status information including: * Document processing status (pending/processing/processed/failed) * Content summary and metadata * Error messages if processing failed * Timestamps for creation and updates ================================================ FILE: lightrag/api/__init__.py ================================================ __api_version__ = "0276" ================================================ FILE: lightrag/api/auth.py ================================================ from datetime import datetime, timedelta import jwt from dotenv import load_dotenv from fastapi import HTTPException, status from pydantic import BaseModel from .config import global_args # use the .env that is inside the current folder # allows to use different .env file for each lightrag instance # the OS environment variables take precedence over the .env file load_dotenv(dotenv_path=".env", override=False) class TokenPayload(BaseModel): sub: str # Username exp: datetime # Expiration time role: str = "user" # User role, default is regular user metadata: dict = {} # Additional metadata class AuthHandler: def __init__(self): self.secret = global_args.token_secret self.algorithm = global_args.jwt_algorithm self.expire_hours = global_args.token_expire_hours self.guest_expire_hours = global_args.guest_token_expire_hours self.accounts = {} auth_accounts = global_args.auth_accounts if auth_accounts: for account in auth_accounts.split(","): username, password = account.split(":", 1) self.accounts[username] = password def create_token( self, username: str, role: str = "user", custom_expire_hours: int = None, metadata: dict = None, ) -> str: """ Create JWT token Args: username: Username role: User role, default is "user", guest is "guest" custom_expire_hours: Custom expiration time (hours), if None use default value metadata: Additional metadata Returns: str: Encoded JWT token """ # Choose default expiration time based on role if custom_expire_hours is None: if role == "guest": expire_hours = self.guest_expire_hours else: expire_hours = self.expire_hours else: expire_hours = custom_expire_hours expire = datetime.utcnow() + timedelta(hours=expire_hours) # Create payload payload = TokenPayload( sub=username, exp=expire, role=role, metadata=metadata or {} ) return jwt.encode(payload.dict(), self.secret, algorithm=self.algorithm) def validate_token(self, token: str) -> dict: """ Validate JWT token Args: token: JWT token Returns: dict: Dictionary containing user information Raises: HTTPException: If token is invalid or expired """ try: payload = jwt.decode(token, self.secret, algorithms=[self.algorithm]) expire_timestamp = payload["exp"] expire_time = datetime.utcfromtimestamp(expire_timestamp) if datetime.utcnow() > expire_time: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired" ) # Return complete payload instead of just username return { "username": payload["sub"], "role": payload.get("role", "user"), "metadata": payload.get("metadata", {}), "exp": expire_time, } except jwt.PyJWTError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token" ) auth_handler = AuthHandler() ================================================ FILE: lightrag/api/config.py ================================================ """ Configs for the LightRAG API. """ import os import re import argparse import logging from dotenv import load_dotenv from lightrag.utils import get_env_value from lightrag.llm.binding_options import ( GeminiEmbeddingOptions, GeminiLLMOptions, OllamaEmbeddingOptions, OllamaLLMOptions, OpenAILLMOptions, ) from lightrag.base import OllamaServerInfos import sys from lightrag.constants import ( DEFAULT_WOKERS, DEFAULT_TIMEOUT, DEFAULT_TOP_K, DEFAULT_CHUNK_TOP_K, DEFAULT_HISTORY_TURNS, DEFAULT_MAX_ENTITY_TOKENS, DEFAULT_MAX_RELATION_TOKENS, DEFAULT_MAX_TOTAL_TOKENS, DEFAULT_COSINE_THRESHOLD, DEFAULT_RELATED_CHUNK_NUMBER, DEFAULT_MIN_RERANK_SCORE, DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, DEFAULT_MAX_ASYNC, DEFAULT_SUMMARY_MAX_TOKENS, DEFAULT_SUMMARY_LENGTH_RECOMMENDED, DEFAULT_SUMMARY_CONTEXT_SIZE, DEFAULT_SUMMARY_LANGUAGE, DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, DEFAULT_EMBEDDING_BATCH_NUM, DEFAULT_OLLAMA_MODEL_NAME, DEFAULT_OLLAMA_MODEL_TAG, DEFAULT_RERANK_BINDING, DEFAULT_ENTITY_TYPES, ) # use the .env that is inside the current folder # allows to use different .env file for each lightrag instance # the OS environment variables take precedence over the .env file load_dotenv(dotenv_path=".env", override=False) ollama_server_infos = OllamaServerInfos() class DefaultRAGStorageConfig: KV_STORAGE = "JsonKVStorage" VECTOR_STORAGE = "NanoVectorDBStorage" GRAPH_STORAGE = "NetworkXStorage" DOC_STATUS_STORAGE = "JsonDocStatusStorage" def get_default_host(binding_type: str) -> str: default_hosts = { "ollama": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"), "lollms": os.getenv("LLM_BINDING_HOST", "http://localhost:9600"), "azure_openai": os.getenv("AZURE_OPENAI_ENDPOINT", "https://api.openai.com/v1"), "openai": os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"), "gemini": os.getenv( "LLM_BINDING_HOST", "https://generativelanguage.googleapis.com" ), } return default_hosts.get( binding_type, os.getenv("LLM_BINDING_HOST", "http://localhost:11434") ) # fallback to ollama if unknown def parse_args() -> argparse.Namespace: """ Parse command line arguments with environment variable fallback Args: is_uvicorn_mode: Whether running under uvicorn mode Returns: argparse.Namespace: Parsed arguments """ parser = argparse.ArgumentParser(description="LightRAG API Server") # Server configuration parser.add_argument( "--host", default=get_env_value("HOST", "0.0.0.0"), help="Server host (default: from env or 0.0.0.0)", ) parser.add_argument( "--port", type=int, default=get_env_value("PORT", 9621, int), help="Server port (default: from env or 9621)", ) # Directory configuration parser.add_argument( "--working-dir", default=get_env_value("WORKING_DIR", "./rag_storage"), help="Working directory for RAG storage (default: from env or ./rag_storage)", ) parser.add_argument( "--input-dir", default=get_env_value("INPUT_DIR", "./inputs"), help="Directory containing input documents (default: from env or ./inputs)", ) parser.add_argument( "--timeout", default=get_env_value("TIMEOUT", DEFAULT_TIMEOUT, int, special_none=True), type=int, help="Timeout in seconds (useful when using slow AI). Use None for infinite timeout", ) # RAG configuration parser.add_argument( "--max-async", type=int, default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int), help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})", ) parser.add_argument( "--summary-max-tokens", type=int, default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int), help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})", ) parser.add_argument( "--summary-context-size", type=int, default=get_env_value( "SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int ), help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})", ) parser.add_argument( "--summary-length-recommended", type=int, default=get_env_value( "SUMMARY_LENGTH_RECOMMENDED", DEFAULT_SUMMARY_LENGTH_RECOMMENDED, int ), help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_LENGTH_RECOMMENDED})", ) # Logging configuration parser.add_argument( "--log-level", default=get_env_value("LOG_LEVEL", "INFO"), choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Logging level (default: from env or INFO)", ) parser.add_argument( "--verbose", action="store_true", default=get_env_value("VERBOSE", False, bool), help="Enable verbose debug output(only valid for DEBUG log-level)", ) parser.add_argument( "--key", type=str, default=get_env_value("LIGHTRAG_API_KEY", None), help="API key for authentication. This protects lightrag server against unauthorized access", ) # Optional https parameters parser.add_argument( "--ssl", action="store_true", default=get_env_value("SSL", False, bool), help="Enable HTTPS (default: from env or False)", ) parser.add_argument( "--ssl-certfile", default=get_env_value("SSL_CERTFILE", None), help="Path to SSL certificate file (required if --ssl is enabled)", ) parser.add_argument( "--ssl-keyfile", default=get_env_value("SSL_KEYFILE", None), help="Path to SSL private key file (required if --ssl is enabled)", ) # Ollama model configuration parser.add_argument( "--simulated-model-name", type=str, default=get_env_value("OLLAMA_EMULATING_MODEL_NAME", DEFAULT_OLLAMA_MODEL_NAME), help="Name for the simulated Ollama model (default: from env or lightrag)", ) parser.add_argument( "--simulated-model-tag", type=str, default=get_env_value("OLLAMA_EMULATING_MODEL_TAG", DEFAULT_OLLAMA_MODEL_TAG), help="Tag for the simulated Ollama model (default: from env or latest)", ) # Namespace parser.add_argument( "--workspace", type=str, default=get_env_value("WORKSPACE", ""), help="Default workspace for all storage", ) # Server workers configuration parser.add_argument( "--workers", type=int, default=get_env_value("WORKERS", DEFAULT_WOKERS, int), help="Number of worker processes (default: from env or 1)", ) # LLM and embedding bindings parser.add_argument( "--llm-binding", type=str, default=get_env_value("LLM_BINDING", "ollama"), choices=[ "lollms", "ollama", "openai", "openai-ollama", "azure_openai", "aws_bedrock", "gemini", ], help="LLM binding type (default: from env or ollama)", ) parser.add_argument( "--embedding-binding", type=str, default=get_env_value("EMBEDDING_BINDING", "ollama"), choices=[ "lollms", "ollama", "openai", "azure_openai", "aws_bedrock", "jina", "gemini", ], help="Embedding binding type (default: from env or ollama)", ) parser.add_argument( "--rerank-binding", type=str, default=get_env_value("RERANK_BINDING", DEFAULT_RERANK_BINDING), choices=["null", "cohere", "jina", "aliyun"], help=f"Rerank binding type (default: from env or {DEFAULT_RERANK_BINDING})", ) # Document loading engine configuration parser.add_argument( "--docling", action="store_true", default=False, help="Enable DOCLING document loading engine (default: from env or DEFAULT)", ) # Conditionally add binding-specific options (Ollama, OpenAI, Azure OpenAI, Gemini) # This registers command line arguments (e.g., --openai-llm-temperature) # and reads corresponding environment variables (e.g., OPENAI_LLM_TEMPERATURE) # Determine LLM binding value consistently from command line or environment llm_binding_value = None if "--llm-binding" in sys.argv: try: idx = sys.argv.index("--llm-binding") if idx + 1 < len(sys.argv) and not sys.argv[idx + 1].startswith("-"): llm_binding_value = sys.argv[idx + 1] except IndexError: pass # Fall back to environment variable using same function as argparse default if llm_binding_value is None: llm_binding_value = get_env_value("LLM_BINDING", "ollama") # Add LLM binding options based on determined value if llm_binding_value == "ollama": OllamaLLMOptions.add_args(parser) elif llm_binding_value in ["openai", "azure_openai"]: OpenAILLMOptions.add_args(parser) elif llm_binding_value == "gemini": GeminiLLMOptions.add_args(parser) # Determine embedding binding value consistently from command line or environment embedding_binding_value = None if "--embedding-binding" in sys.argv: try: idx = sys.argv.index("--embedding-binding") if idx + 1 < len(sys.argv) and not sys.argv[idx + 1].startswith("-"): embedding_binding_value = sys.argv[idx + 1] except IndexError: pass # Fall back to environment variable using same function as argparse default if embedding_binding_value is None: embedding_binding_value = get_env_value("EMBEDDING_BINDING", "ollama") # Add embedding binding options based on determined value if embedding_binding_value == "ollama": OllamaEmbeddingOptions.add_args(parser) elif embedding_binding_value == "gemini": GeminiEmbeddingOptions.add_args(parser) args = parser.parse_args() # convert relative path to absolute path args.working_dir = os.path.abspath(args.working_dir) args.input_dir = os.path.abspath(args.input_dir) # Inject storage configuration from environment variables args.kv_storage = get_env_value( "LIGHTRAG_KV_STORAGE", DefaultRAGStorageConfig.KV_STORAGE ) args.doc_status_storage = get_env_value( "LIGHTRAG_DOC_STATUS_STORAGE", DefaultRAGStorageConfig.DOC_STATUS_STORAGE ) args.graph_storage = get_env_value( "LIGHTRAG_GRAPH_STORAGE", DefaultRAGStorageConfig.GRAPH_STORAGE ) args.vector_storage = get_env_value( "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE ) # Get MAX_PARALLEL_INSERT from environment args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int) # Get MAX_GRAPH_NODES from environment args.max_graph_nodes = get_env_value("MAX_GRAPH_NODES", 1000, int) # Handle openai-ollama special case if args.llm_binding == "openai-ollama": args.llm_binding = "openai" args.embedding_binding = "ollama" # Ollama ctx_num args.ollama_num_ctx = get_env_value("OLLAMA_NUM_CTX", 32768, int) args.llm_binding_host = get_env_value( "LLM_BINDING_HOST", get_default_host(args.llm_binding) ) args.embedding_binding_host = get_env_value( "EMBEDDING_BINDING_HOST", get_default_host(args.embedding_binding) ) args.llm_binding_api_key = get_env_value("LLM_BINDING_API_KEY", None) args.embedding_binding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "") # Inject model configuration args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest") # EMBEDDING_MODEL defaults to None - each binding will use its own default model # e.g., OpenAI uses "text-embedding-3-small", Jina uses "jina-embeddings-v4" args.embedding_model = get_env_value("EMBEDDING_MODEL", None, special_none=True) # EMBEDDING_DIM defaults to None - each binding will use its own default dimension # Value is inherited from provider defaults via wrap_embedding_func_with_attrs decorator args.embedding_dim = get_env_value("EMBEDDING_DIM", None, int, special_none=True) args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool) # Inject chunk configuration args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int) args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int) # Inject LLM cache configuration args.enable_llm_cache_for_extract = get_env_value( "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool ) args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool) # Set document_loading_engine from --docling flag if args.docling: args.document_loading_engine = "DOCLING" else: args.document_loading_engine = get_env_value( "DOCUMENT_LOADING_ENGINE", "DEFAULT" ) # PDF decryption password args.pdf_decrypt_password = get_env_value("PDF_DECRYPT_PASSWORD", None) # Add environment variables that were previously read directly args.cors_origins = get_env_value("CORS_ORIGINS", "*") args.summary_language = get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE) args.entity_types = get_env_value("ENTITY_TYPES", DEFAULT_ENTITY_TYPES, list) args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*") # For JWT Auth args.auth_accounts = get_env_value("AUTH_ACCOUNTS", "") args.token_secret = get_env_value( "TOKEN_SECRET", "lightrag-jwt-default-secret-key!" ) args.token_expire_hours = get_env_value("TOKEN_EXPIRE_HOURS", 48, float) args.guest_token_expire_hours = get_env_value("GUEST_TOKEN_EXPIRE_HOURS", 24, float) args.jwt_algorithm = get_env_value("JWT_ALGORITHM", "HS256") # Token auto-renewal configuration (sliding window expiration) args.token_auto_renew = get_env_value("TOKEN_AUTO_RENEW", True, bool) args.token_renew_threshold = get_env_value("TOKEN_RENEW_THRESHOLD", 0.5, float) # Rerank model configuration args.rerank_model = get_env_value("RERANK_MODEL", None) args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None) args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None) # Note: rerank_binding is already set by argparse, no need to override from env # Min rerank score configuration args.min_rerank_score = get_env_value( "MIN_RERANK_SCORE", DEFAULT_MIN_RERANK_SCORE, float ) # Query configuration args.history_turns = get_env_value("HISTORY_TURNS", DEFAULT_HISTORY_TURNS, int) args.top_k = get_env_value("TOP_K", DEFAULT_TOP_K, int) args.chunk_top_k = get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int) args.max_entity_tokens = get_env_value( "MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int ) args.max_relation_tokens = get_env_value( "MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int ) args.max_total_tokens = get_env_value( "MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int ) args.cosine_threshold = get_env_value( "COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, float ) args.related_chunk_number = get_env_value( "RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int ) # Add missing environment variables for health endpoint args.force_llm_summary_on_merge = get_env_value( "FORCE_LLM_SUMMARY_ON_MERGE", DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int ) args.embedding_func_max_async = get_env_value( "EMBEDDING_FUNC_MAX_ASYNC", DEFAULT_EMBEDDING_FUNC_MAX_ASYNC, int ) args.embedding_batch_num = get_env_value( "EMBEDDING_BATCH_NUM", DEFAULT_EMBEDDING_BATCH_NUM, int ) # Embedding token limit configuration args.embedding_token_limit = get_env_value( "EMBEDDING_TOKEN_LIMIT", None, int, special_none=True ) # File upload size limit (in bytes, None for unlimited) # Default: 100MB (104857600 bytes) args.max_upload_size = get_env_value( "MAX_UPLOAD_SIZE", 104857600, int, special_none=True ) ollama_server_infos.LIGHTRAG_NAME = args.simulated_model_name ollama_server_infos.LIGHTRAG_TAG = args.simulated_model_tag # Sanitize workspace: only alphanumeric characters and underscores are allowed if args.workspace: sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", args.workspace) if sanitized != args.workspace: logging.warning( f"Workspace name '{args.workspace}' contains invalid characters. " f"It has been sanitized to '{sanitized}'. " "Only alphanumeric characters and underscores are allowed." ) args.workspace = sanitized return args def update_uvicorn_mode_config(): # If in uvicorn mode and workers > 1, force it to 1 and log warning if global_args.workers > 1: original_workers = global_args.workers global_args.workers = 1 # Log warning directly here logging.warning( f">> Forcing workers=1 in uvicorn mode(Ignoring workers={original_workers})" ) # Global configuration with lazy initialization _global_args = None _initialized = False def initialize_config(args=None, force=False): """Initialize global configuration This function allows explicit initialization of the configuration, which is useful for programmatic usage, testing, or embedding LightRAG in other applications. Args: args: Pre-parsed argparse.Namespace or None to parse from sys.argv force: Force re-initialization even if already initialized Returns: argparse.Namespace: The configured arguments Example: # Use parsed command line arguments (default) initialize_config() # Use custom configuration programmatically custom_args = argparse.Namespace( host='localhost', port=8080, working_dir='./custom_rag', # ... other config ) initialize_config(custom_args) """ global _global_args, _initialized if _initialized and not force: return _global_args _global_args = args if args is not None else parse_args() _initialized = True return _global_args def get_config(): """Get global configuration, auto-initializing if needed Returns: argparse.Namespace: The configured arguments """ if not _initialized: initialize_config() return _global_args class _GlobalArgsProxy: """Proxy object that auto-initializes configuration on first access This maintains backward compatibility with existing code while allowing programmatic control over initialization timing. The proxy fully delegates to the underlying argparse.Namespace, including support for vars() calls which is used by binding_options to extract provider-specific configuration options. """ def __getattribute__(self, name): """Override attribute access to support vars() and regular attribute access. This method intercepts __dict__ access (used by vars()) and delegates to the underlying _global_args namespace, ensuring binding options can be properly extracted. """ global _initialized, _global_args # Handle __dict__ access for vars() support if name == "__dict__": if not _initialized: initialize_config() return vars(_global_args) # Handle class-level attributes that should come from the proxy itself if name in ("__class__", "__repr__", "__getattribute__", "__setattr__"): return object.__getattribute__(self, name) # Delegate all other attribute access to the underlying namespace if not _initialized: initialize_config() return getattr(_global_args, name) def __setattr__(self, name, value): global _initialized, _global_args if not _initialized: initialize_config() setattr(_global_args, name, value) def __repr__(self): global _initialized, _global_args if not _initialized: return "" return repr(_global_args) # Create proxy instance for backward compatibility # Existing code like `from config import global_args` continues to work # The proxy will auto-initialize on first attribute access global_args = _GlobalArgsProxy() ================================================ FILE: lightrag/api/gunicorn_config.py ================================================ # gunicorn_config.py import os import logging from lightrag.kg.shared_storage import finalize_share_data from lightrag.utils import setup_logger, get_env_value from lightrag.constants import ( DEFAULT_LOG_MAX_BYTES, DEFAULT_LOG_BACKUP_COUNT, DEFAULT_LOG_FILENAME, ) # Get log directory path from environment variable log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME)) # Ensure log directory exists os.makedirs(os.path.dirname(log_file_path), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int) log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int) # These variables will be set by run_with_gunicorn.py workers = None bind = None loglevel = None certfile = None keyfile = None # Enable preload_app option preload_app = True # Use Uvicorn worker worker_class = "uvicorn.workers.UvicornWorker" # Other Gunicorn configurations # Logging configuration errorlog = os.getenv("ERROR_LOG", log_file_path) # Default write to lightrag.log accesslog = os.getenv("ACCESS_LOG", log_file_path) # Default write to lightrag.log logconfig_dict = { "version": 1, "disable_existing_loggers": False, "formatters": { "standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"}, }, "handlers": { "console": { "class": "logging.StreamHandler", "formatter": "standard", "stream": "ext://sys.stdout", }, "file": { "class": "logging.handlers.RotatingFileHandler", "formatter": "standard", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf8", }, }, "filters": { "path_filter": { "()": "lightrag.utils.LightragPathFilter", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": loglevel.upper() if loglevel else "INFO", "propagate": False, }, "gunicorn": { "handlers": ["console", "file"], "level": loglevel.upper() if loglevel else "INFO", "propagate": False, }, "gunicorn.error": { "handlers": ["console", "file"], "level": loglevel.upper() if loglevel else "INFO", "propagate": False, }, "gunicorn.access": { "handlers": ["console", "file"], "level": loglevel.upper() if loglevel else "INFO", "propagate": False, "filters": ["path_filter"], }, }, } def on_starting(server): """ Executed when Gunicorn starts, before forking the first worker processes You can use this function to do more initialization tasks for all processes """ print("=" * 80) print(f"GUNICORN MASTER PROCESS: on_starting jobs for {workers} worker(s)") print(f"Process ID: {os.getpid()}") print("=" * 80) # Memory usage monitoring try: import psutil process = psutil.Process(os.getpid()) memory_info = process.memory_info() msg = ( f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB" ) print(msg) except ImportError: print("psutil not installed, skipping memory usage reporting") # Log the location of the LightRAG log file print(f"LightRAG log file: {log_file_path}\n") print("Gunicorn initialization complete, forking workers...\n") def on_exit(server): """ Executed when Gunicorn is shutting down. This is a good place to release shared resources. """ print("=" * 80) print("GUNICORN MASTER PROCESS: Shutting down") print(f"Process ID: {os.getpid()}") print("Finalizing shared storage...") finalize_share_data() print("Gunicorn shutdown complete") print("=" * 80) def post_fork(server, worker): """ Executed after a worker has been forked. This is a good place to set up worker-specific configurations. """ # Set up main loggers log_level = loglevel.upper() if loglevel else "INFO" setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path) setup_logger( "uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path ) setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path) # Set up lightrag submodule loggers for name in logging.root.manager.loggerDict: if name.startswith("lightrag."): setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path) # Disable uvicorn.error logger uvicorn_error_logger = logging.getLogger("uvicorn.error") uvicorn_error_logger.handlers = [] uvicorn_error_logger.setLevel(logging.CRITICAL) uvicorn_error_logger.propagate = False ================================================ FILE: lightrag/api/lightrag_server.py ================================================ """ LightRAG FastAPI Server """ from fastapi import FastAPI, Depends, HTTPException, Request from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse from fastapi.openapi.docs import ( get_swagger_ui_html, get_swagger_ui_oauth2_redirect_html, ) import os import re import logging import logging.config import sys import uvicorn import pipmaster as pm from fastapi.staticfiles import StaticFiles from fastapi.responses import RedirectResponse from pathlib import Path import configparser from ascii_colors import ASCIIColors from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager from dotenv import load_dotenv from lightrag.api.utils_api import ( get_combined_auth_dependency, display_splash_screen, check_env_file, ) from .config import ( global_args, update_uvicorn_mode_config, get_default_host, ) from lightrag.utils import get_env_value from lightrag import LightRAG, __version__ as core_version from lightrag.api import __api_version__ from lightrag.types import GPTKeywordExtractionFormat from lightrag.utils import EmbeddingFunc from lightrag.constants import ( DEFAULT_LOG_MAX_BYTES, DEFAULT_LOG_BACKUP_COUNT, DEFAULT_LOG_FILENAME, DEFAULT_LLM_TIMEOUT, DEFAULT_EMBEDDING_TIMEOUT, ) from lightrag.api.routers.document_routes import ( DocumentManager, create_document_routes, ) from lightrag.api.routers.query_routes import create_query_routes from lightrag.api.routers.graph_routes import create_graph_routes from lightrag.api.routers.ollama_api import OllamaAPI from lightrag.utils import logger, set_verbose_debug from lightrag.kg.shared_storage import ( get_namespace_data, get_default_workspace, # set_default_workspace, cleanup_keyed_lock, finalize_share_data, ) from fastapi.security import OAuth2PasswordRequestForm from lightrag.api.auth import auth_handler # use the .env that is inside the current folder # allows to use different .env file for each lightrag instance # the OS environment variables take precedence over the .env file load_dotenv(dotenv_path=".env", override=False) webui_title = os.getenv("WEBUI_TITLE") webui_description = os.getenv("WEBUI_DESCRIPTION") # Initialize config parser config = configparser.ConfigParser() config.read("config.ini") # Global authentication configuration auth_configured = bool(auth_handler.accounts) class LLMConfigCache: """Smart LLM and Embedding configuration cache class""" def __init__(self, args): self.args = args # Initialize configurations based on binding conditions self.openai_llm_options = None self.gemini_llm_options = None self.gemini_embedding_options = None self.ollama_llm_options = None self.ollama_embedding_options = None # Only initialize and log OpenAI options when using OpenAI-related bindings if args.llm_binding in ["openai", "azure_openai"]: from lightrag.llm.binding_options import OpenAILLMOptions self.openai_llm_options = OpenAILLMOptions.options_dict(args) logger.info(f"OpenAI LLM Options: {self.openai_llm_options}") if args.llm_binding == "gemini": from lightrag.llm.binding_options import GeminiLLMOptions self.gemini_llm_options = GeminiLLMOptions.options_dict(args) logger.info(f"Gemini LLM Options: {self.gemini_llm_options}") # Only initialize and log Ollama LLM options when using Ollama LLM binding if args.llm_binding == "ollama": try: from lightrag.llm.binding_options import OllamaLLMOptions self.ollama_llm_options = OllamaLLMOptions.options_dict(args) logger.info(f"Ollama LLM Options: {self.ollama_llm_options}") except ImportError: logger.warning( "OllamaLLMOptions not available, using default configuration" ) self.ollama_llm_options = {} # Only initialize and log Ollama Embedding options when using Ollama Embedding binding if args.embedding_binding == "ollama": try: from lightrag.llm.binding_options import OllamaEmbeddingOptions self.ollama_embedding_options = OllamaEmbeddingOptions.options_dict( args ) logger.info( f"Ollama Embedding Options: {self.ollama_embedding_options}" ) except ImportError: logger.warning( "OllamaEmbeddingOptions not available, using default configuration" ) self.ollama_embedding_options = {} # Only initialize and log Gemini Embedding options when using Gemini Embedding binding if args.embedding_binding == "gemini": try: from lightrag.llm.binding_options import GeminiEmbeddingOptions self.gemini_embedding_options = GeminiEmbeddingOptions.options_dict( args ) logger.info( f"Gemini Embedding Options: {self.gemini_embedding_options}" ) except ImportError: logger.warning( "GeminiEmbeddingOptions not available, using default configuration" ) self.gemini_embedding_options = {} def check_frontend_build(): """Check if frontend is built and optionally check if source is up-to-date Returns: tuple: (assets_exist: bool, is_outdated: bool) - assets_exist: True if WebUI build files exist - is_outdated: True if source is newer than build (only in dev environment) """ webui_dir = Path(__file__).parent / "webui" index_html = webui_dir / "index.html" # 1. Check if build files exist if not index_html.exists(): ASCIIColors.yellow("\n" + "=" * 80) ASCIIColors.yellow("WARNING: Frontend Not Built") ASCIIColors.yellow("=" * 80) ASCIIColors.yellow("The WebUI frontend has not been built yet.") ASCIIColors.yellow("The API server will start without the WebUI interface.") ASCIIColors.yellow( "\nTo enable WebUI, build the frontend using these commands:\n" ) ASCIIColors.cyan(" cd lightrag_webui") ASCIIColors.cyan(" bun install --frozen-lockfile") ASCIIColors.cyan(" bun run build") ASCIIColors.cyan(" cd ..") ASCIIColors.yellow("\nThen restart the service.\n") ASCIIColors.cyan( "Note: Make sure you have Bun installed. Visit https://bun.sh for installation." ) ASCIIColors.yellow("=" * 80 + "\n") return (False, False) # Assets don't exist, not outdated # 2. Check if this is a development environment (source directory exists) try: source_dir = Path(__file__).parent.parent.parent / "lightrag_webui" src_dir = source_dir / "src" # Determine if this is a development environment: source directory exists and contains src directory if not source_dir.exists() or not src_dir.exists(): # Production environment, skip source code check logger.debug( "Production environment detected, skipping source freshness check" ) return (True, False) # Assets exist, not outdated (prod environment) # Development environment, perform source code timestamp check logger.debug("Development environment detected, checking source freshness") # Source code file extensions (files to check) source_extensions = { ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", # TypeScript/JavaScript ".css", ".scss", ".sass", ".less", # Style files ".json", ".jsonc", # Configuration/data files ".html", ".htm", # Template files ".md", ".mdx", # Markdown } # Key configuration files (in lightrag_webui root directory) key_files = [ source_dir / "package.json", source_dir / "bun.lock", source_dir / "vite.config.ts", source_dir / "tsconfig.json", source_dir / "tailraid.config.js", source_dir / "index.html", ] # Get the latest modification time of source code latest_source_time = 0 # Check source code files in src directory for file_path in src_dir.rglob("*"): if file_path.is_file(): # Only check source code files, ignore temporary files and logs if file_path.suffix.lower() in source_extensions: mtime = file_path.stat().st_mtime latest_source_time = max(latest_source_time, mtime) # Check key configuration files for key_file in key_files: if key_file.exists(): mtime = key_file.stat().st_mtime latest_source_time = max(latest_source_time, mtime) # Get build time build_time = index_html.stat().st_mtime # Compare timestamps (5 second tolerance to avoid file system time precision issues) if latest_source_time > build_time + 5: ASCIIColors.yellow("\n" + "=" * 80) ASCIIColors.yellow("WARNING: Frontend Source Code Has Been Updated") ASCIIColors.yellow("=" * 80) ASCIIColors.yellow( "The frontend source code is newer than the current build." ) ASCIIColors.yellow( "This might happen after 'git pull' or manual code changes.\n" ) ASCIIColors.cyan( "Recommended: Rebuild the frontend to use the latest changes:" ) ASCIIColors.cyan(" cd lightrag_webui") ASCIIColors.cyan(" bun install --frozen-lockfile") ASCIIColors.cyan(" bun run build") ASCIIColors.cyan(" cd ..") ASCIIColors.yellow("\nThe server will continue with the current build.") ASCIIColors.yellow("=" * 80 + "\n") return (True, True) # Assets exist, outdated else: logger.info("Frontend build is up-to-date") return (True, False) # Assets exist, up-to-date except Exception as e: # If check fails, log warning but don't affect startup logger.warning(f"Failed to check frontend source freshness: {e}") return (True, False) # Assume assets exist and up-to-date on error def create_app(args): # Check frontend build first and get status webui_assets_exist, is_frontend_outdated = check_frontend_build() # Create unified API version display with warning symbol if frontend is outdated api_version_display = ( f"{__api_version__}⚠️" if is_frontend_outdated else __api_version__ ) # Setup logging logger.setLevel(args.log_level) set_verbose_debug(args.verbose) # Create configuration cache (this will output configuration logs) config_cache = LLMConfigCache(args) # Verify that bindings are correctly setup if args.llm_binding not in [ "lollms", "ollama", "openai", "azure_openai", "aws_bedrock", "gemini", ]: raise Exception("llm binding not supported") if args.embedding_binding not in [ "lollms", "ollama", "openai", "azure_openai", "aws_bedrock", "jina", "gemini", ]: raise Exception("embedding binding not supported") # Set default hosts if not provided if args.llm_binding_host is None: args.llm_binding_host = get_default_host(args.llm_binding) if args.embedding_binding_host is None: args.embedding_binding_host = get_default_host(args.embedding_binding) # Add SSL validation if args.ssl: if not args.ssl_certfile or not args.ssl_keyfile: raise Exception( "SSL certificate and key files must be provided when SSL is enabled" ) if not os.path.exists(args.ssl_certfile): raise Exception(f"SSL certificate file not found: {args.ssl_certfile}") if not os.path.exists(args.ssl_keyfile): raise Exception(f"SSL key file not found: {args.ssl_keyfile}") # Check if API key is provided either through env var or args api_key = os.getenv("LIGHTRAG_API_KEY") or args.key # Initialize document manager with workspace support for data isolation doc_manager = DocumentManager(args.input_dir, workspace=args.workspace) @asynccontextmanager async def lifespan(app: FastAPI): """Lifespan context manager for startup and shutdown events""" # Store background tasks app.state.background_tasks = set() try: # Initialize database connections # Note: initialize_storages() now auto-initializes pipeline_status for rag.workspace await rag.initialize_storages() # Data migration regardless of storage implementation await rag.check_and_migrate_data() ASCIIColors.green("\nServer is ready to accept connections! 🚀\n") yield finally: # Clean up database connections await rag.finalize_storages() if "LIGHTRAG_GUNICORN_MODE" not in os.environ: # Only perform cleanup in Uvicorn single-process mode logger.debug("Unvicorn Mode: finalizing shared storage...") finalize_share_data() else: # In Gunicorn mode with preload_app=True, cleanup is handled by on_exit hooks logger.debug( "Gunicorn Mode: postpone shared storage finalization to master process" ) # Initialize FastAPI base_description = ( "Providing API for LightRAG core, Web UI and Ollama Model Emulation" ) swagger_description = ( base_description + (" (API-Key Enabled)" if api_key else "") + "\n\n[View ReDoc documentation](/redoc)" ) app_kwargs = { "title": "LightRAG Server API", "description": swagger_description, "version": __api_version__, "openapi_url": "/openapi.json", # Explicitly set OpenAPI schema URL "docs_url": None, # Disable default docs, we'll create custom endpoint "redoc_url": "/redoc", # Explicitly set redoc URL "lifespan": lifespan, } # Configure Swagger UI parameters # Enable persistAuthorization and tryItOutEnabled for better user experience app_kwargs["swagger_ui_parameters"] = { "persistAuthorization": True, "tryItOutEnabled": True, } app = FastAPI(**app_kwargs) # Add custom validation error handler for /query/data endpoint @app.exception_handler(RequestValidationError) async def validation_exception_handler( request: Request, exc: RequestValidationError ): # Check if this is a request to /query/data endpoint if request.url.path.endswith("/query/data"): # Extract error details error_details = [] for error in exc.errors(): field_path = " -> ".join(str(loc) for loc in error["loc"]) error_details.append(f"{field_path}: {error['msg']}") error_message = "; ".join(error_details) # Return in the expected format for /query/data return JSONResponse( status_code=400, content={ "status": "failure", "message": f"Validation error: {error_message}", "data": {}, "metadata": {}, }, ) else: # For other endpoints, return the default FastAPI validation error return JSONResponse(status_code=422, content={"detail": exc.errors()}) def get_cors_origins(): """Get allowed origins from global_args Returns a list of allowed origins, defaults to ["*"] if not set """ origins_str = global_args.cors_origins if origins_str == "*": return ["*"] return [origin.strip() for origin in origins_str.split(",")] # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=get_cors_origins(), allow_credentials=True, allow_methods=["*"], allow_headers=["*"], expose_headers=[ "X-New-Token" ], # Expose token renewal header for cross-origin requests ) # Create combined auth dependency for all endpoints combined_auth = get_combined_auth_dependency(api_key) def get_workspace_from_request(request: Request) -> str | None: """ Extract workspace from HTTP request header or use default. This enables multi-workspace API support by checking the custom 'LIGHTRAG-WORKSPACE' header. If not present, falls back to the server's default workspace configuration. Args: request: FastAPI Request object Returns: Workspace identifier (may be empty string for global namespace) """ # Check custom header first workspace = request.headers.get("LIGHTRAG-WORKSPACE", "").strip() if not workspace: workspace = None else: sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", workspace) if sanitized != workspace: logger.warning( f"Workspace header '{workspace}' contains invalid characters. " f"Sanitized to '{sanitized}'." ) workspace = sanitized return workspace # Create working directory if it doesn't exist Path(args.working_dir).mkdir(parents=True, exist_ok=True) def create_optimized_openai_llm_func( config_cache: LLMConfigCache, args, llm_timeout: int ): """Create optimized OpenAI LLM function with pre-processed configuration""" async def optimized_openai_alike_model_complete( prompt, system_prompt=None, history_messages=None, keyword_extraction=False, **kwargs, ) -> str: from lightrag.llm.openai import openai_complete_if_cache keyword_extraction = kwargs.pop("keyword_extraction", None) if keyword_extraction: kwargs["response_format"] = GPTKeywordExtractionFormat if history_messages is None: history_messages = [] # Use pre-processed configuration to avoid repeated parsing kwargs["timeout"] = llm_timeout if config_cache.openai_llm_options: kwargs.update(config_cache.openai_llm_options) return await openai_complete_if_cache( args.llm_model, prompt, system_prompt=system_prompt, history_messages=history_messages, base_url=args.llm_binding_host, api_key=args.llm_binding_api_key, **kwargs, ) return optimized_openai_alike_model_complete def create_optimized_azure_openai_llm_func( config_cache: LLMConfigCache, args, llm_timeout: int ): """Create optimized Azure OpenAI LLM function with pre-processed configuration""" async def optimized_azure_openai_model_complete( prompt, system_prompt=None, history_messages=None, keyword_extraction=False, **kwargs, ) -> str: from lightrag.llm.azure_openai import azure_openai_complete_if_cache keyword_extraction = kwargs.pop("keyword_extraction", None) if keyword_extraction: kwargs["response_format"] = GPTKeywordExtractionFormat if history_messages is None: history_messages = [] # Use pre-processed configuration to avoid repeated parsing kwargs["timeout"] = llm_timeout if config_cache.openai_llm_options: kwargs.update(config_cache.openai_llm_options) return await azure_openai_complete_if_cache( args.llm_model, prompt, system_prompt=system_prompt, history_messages=history_messages, base_url=args.llm_binding_host, api_key=os.getenv("AZURE_OPENAI_API_KEY", args.llm_binding_api_key), api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"), **kwargs, ) return optimized_azure_openai_model_complete def create_optimized_gemini_llm_func( config_cache: LLMConfigCache, args, llm_timeout: int ): """Create optimized Gemini LLM function with cached configuration""" async def optimized_gemini_model_complete( prompt, system_prompt=None, history_messages=None, keyword_extraction=False, **kwargs, ) -> str: from lightrag.llm.gemini import gemini_complete_if_cache if history_messages is None: history_messages = [] # Use pre-processed configuration to avoid repeated parsing kwargs["timeout"] = llm_timeout if ( config_cache.gemini_llm_options is not None and "generation_config" not in kwargs ): kwargs["generation_config"] = dict(config_cache.gemini_llm_options) return await gemini_complete_if_cache( args.llm_model, prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=args.llm_binding_api_key, base_url=args.llm_binding_host, keyword_extraction=keyword_extraction, **kwargs, ) return optimized_gemini_model_complete def create_llm_model_func(binding: str): """ Create LLM model function based on binding type. Uses optimized functions for OpenAI bindings and lazy import for others. """ try: if binding == "lollms": from lightrag.llm.lollms import lollms_model_complete return lollms_model_complete elif binding == "ollama": from lightrag.llm.ollama import ollama_model_complete return ollama_model_complete elif binding == "aws_bedrock": return bedrock_model_complete # Already defined locally elif binding == "azure_openai": # Use optimized function with pre-processed configuration return create_optimized_azure_openai_llm_func( config_cache, args, llm_timeout ) elif binding == "gemini": return create_optimized_gemini_llm_func(config_cache, args, llm_timeout) else: # openai and compatible # Use optimized function with pre-processed configuration return create_optimized_openai_llm_func(config_cache, args, llm_timeout) except ImportError as e: raise Exception(f"Failed to import {binding} LLM binding: {e}") def create_llm_model_kwargs(binding: str, args, llm_timeout: int) -> dict: """ Create LLM model kwargs based on binding type. Uses lazy import for binding-specific options. """ if binding in ["lollms", "ollama"]: try: from lightrag.llm.binding_options import OllamaLLMOptions return { "host": args.llm_binding_host, "timeout": llm_timeout, "options": OllamaLLMOptions.options_dict(args), "api_key": args.llm_binding_api_key, } except ImportError as e: raise Exception(f"Failed to import {binding} options: {e}") return {} def create_optimized_embedding_function( config_cache: LLMConfigCache, binding, model, host, api_key, args ) -> EmbeddingFunc: """ Create optimized embedding function and return an EmbeddingFunc instance with proper max_token_size inheritance from provider defaults. This function: 1. Imports the provider embedding function 2. Extracts max_token_size and embedding_dim from provider if it's an EmbeddingFunc 3. Creates an optimized wrapper that calls the underlying function directly (avoiding double-wrapping) 4. Returns a properly configured EmbeddingFunc instance Configuration Rules: - When EMBEDDING_MODEL is not set: Uses provider's default model and dimension (e.g., jina-embeddings-v4 with 2048 dims, text-embedding-3-small with 1536 dims) - When EMBEDDING_MODEL is set to a custom model: User MUST also set EMBEDDING_DIM to match the custom model's dimension (e.g., for jina-embeddings-v3, set EMBEDDING_DIM=1024) Note: The embedding_dim parameter is automatically injected by EmbeddingFunc wrapper when send_dimensions=True (enabled for Jina and Gemini bindings). This wrapper calls the underlying provider function directly (.func) to avoid double-wrapping, so we must explicitly pass embedding_dim to the provider's underlying function. """ # Step 1: Import provider function and extract default attributes provider_func = None provider_max_token_size = None provider_embedding_dim = None try: if binding == "openai": from lightrag.llm.openai import openai_embed provider_func = openai_embed elif binding == "ollama": from lightrag.llm.ollama import ollama_embed provider_func = ollama_embed elif binding == "gemini": from lightrag.llm.gemini import gemini_embed provider_func = gemini_embed elif binding == "jina": from lightrag.llm.jina import jina_embed provider_func = jina_embed elif binding == "azure_openai": from lightrag.llm.azure_openai import azure_openai_embed provider_func = azure_openai_embed elif binding == "aws_bedrock": from lightrag.llm.bedrock import bedrock_embed provider_func = bedrock_embed elif binding == "lollms": from lightrag.llm.lollms import lollms_embed provider_func = lollms_embed # Extract attributes if provider is an EmbeddingFunc if provider_func and isinstance(provider_func, EmbeddingFunc): provider_max_token_size = provider_func.max_token_size provider_embedding_dim = provider_func.embedding_dim logger.debug( f"Extracted from {binding} provider: " f"max_token_size={provider_max_token_size}, " f"embedding_dim={provider_embedding_dim}" ) except ImportError as e: logger.warning(f"Could not import provider function for {binding}: {e}") # Step 2: Apply priority (user config > provider default) # For max_token_size: explicit env var > provider default > None final_max_token_size = args.embedding_token_limit or provider_max_token_size # For embedding_dim: user config (always has value) takes priority # Only use provider default if user config is explicitly None (which shouldn't happen) final_embedding_dim = ( args.embedding_dim if args.embedding_dim else provider_embedding_dim ) # Step 3: Create optimized embedding function (calls underlying function directly) # Note: When model is None, each binding will use its own default model async def optimized_embedding_function(texts, embedding_dim=None): try: if binding == "lollms": from lightrag.llm.lollms import lollms_embed # Get real function, skip EmbeddingFunc wrapper if present actual_func = ( lollms_embed.func if isinstance(lollms_embed, EmbeddingFunc) else lollms_embed ) # lollms embed_model is not used (server uses configured vectorizer) # Only pass base_url and api_key return await actual_func(texts, base_url=host, api_key=api_key) elif binding == "ollama": from lightrag.llm.ollama import ollama_embed # Get real function, skip EmbeddingFunc wrapper if present actual_func = ( ollama_embed.func if isinstance(ollama_embed, EmbeddingFunc) else ollama_embed ) # Use pre-processed configuration if available if config_cache.ollama_embedding_options is not None: ollama_options = config_cache.ollama_embedding_options else: from lightrag.llm.binding_options import OllamaEmbeddingOptions ollama_options = OllamaEmbeddingOptions.options_dict(args) # Pass embed_model only if provided, let function use its default (bge-m3:latest) kwargs = { "texts": texts, "host": host, "api_key": api_key, "options": ollama_options, } if model: kwargs["embed_model"] = model return await actual_func(**kwargs) elif binding == "azure_openai": from lightrag.llm.azure_openai import azure_openai_embed actual_func = ( azure_openai_embed.func if isinstance(azure_openai_embed, EmbeddingFunc) else azure_openai_embed ) # Pass model only if provided, let function use its default otherwise kwargs = { "texts": texts, "api_key": api_key, "embedding_dim": embedding_dim, } if model: kwargs["model"] = model return await actual_func(**kwargs) elif binding == "aws_bedrock": from lightrag.llm.bedrock import bedrock_embed actual_func = ( bedrock_embed.func if isinstance(bedrock_embed, EmbeddingFunc) else bedrock_embed ) # Pass model only if provided, let function use its default otherwise kwargs = {"texts": texts} if model: kwargs["model"] = model return await actual_func(**kwargs) elif binding == "jina": from lightrag.llm.jina import jina_embed actual_func = ( jina_embed.func if isinstance(jina_embed, EmbeddingFunc) else jina_embed ) # Pass model only if provided, let function use its default (jina-embeddings-v4) kwargs = { "texts": texts, "embedding_dim": embedding_dim, "base_url": host, "api_key": api_key, } if model: kwargs["model"] = model return await actual_func(**kwargs) elif binding == "gemini": from lightrag.llm.gemini import gemini_embed actual_func = ( gemini_embed.func if isinstance(gemini_embed, EmbeddingFunc) else gemini_embed ) # Use pre-processed configuration if available if config_cache.gemini_embedding_options is not None: gemini_options = config_cache.gemini_embedding_options else: from lightrag.llm.binding_options import GeminiEmbeddingOptions gemini_options = GeminiEmbeddingOptions.options_dict(args) # Pass model only if provided, let function use its default (gemini-embedding-001) kwargs = { "texts": texts, "base_url": host, "api_key": api_key, "embedding_dim": embedding_dim, "task_type": gemini_options.get( "task_type", "RETRIEVAL_DOCUMENT" ), } if model: kwargs["model"] = model return await actual_func(**kwargs) else: # openai and compatible from lightrag.llm.openai import openai_embed actual_func = ( openai_embed.func if isinstance(openai_embed, EmbeddingFunc) else openai_embed ) # Pass model only if provided, let function use its default (text-embedding-3-small) kwargs = { "texts": texts, "base_url": host, "api_key": api_key, "embedding_dim": embedding_dim, } if model: kwargs["model"] = model return await actual_func(**kwargs) except ImportError as e: raise Exception(f"Failed to import {binding} embedding: {e}") # Step 4: Wrap in EmbeddingFunc and return embedding_func_instance = EmbeddingFunc( embedding_dim=final_embedding_dim, func=optimized_embedding_function, max_token_size=final_max_token_size, send_dimensions=False, # Will be set later based on binding requirements model_name=model, ) # Log final embedding configuration logger.info( f"Embedding config: binding={binding} model={model} " f"embedding_dim={final_embedding_dim} max_token_size={final_max_token_size}" ) return embedding_func_instance llm_timeout = get_env_value("LLM_TIMEOUT", DEFAULT_LLM_TIMEOUT, int) embedding_timeout = get_env_value( "EMBEDDING_TIMEOUT", DEFAULT_EMBEDDING_TIMEOUT, int ) async def bedrock_model_complete( prompt, system_prompt=None, history_messages=None, keyword_extraction=False, **kwargs, ) -> str: # Lazy import from lightrag.llm.bedrock import bedrock_complete_if_cache keyword_extraction = kwargs.pop("keyword_extraction", None) if keyword_extraction: kwargs["response_format"] = GPTKeywordExtractionFormat if history_messages is None: history_messages = [] # Use global temperature for Bedrock kwargs["temperature"] = get_env_value("BEDROCK_LLM_TEMPERATURE", 1.0, float) return await bedrock_complete_if_cache( args.llm_model, prompt, system_prompt=system_prompt, history_messages=history_messages, **kwargs, ) # Create embedding function with optimized configuration and max_token_size inheritance import inspect # Create the EmbeddingFunc instance (now returns complete EmbeddingFunc with max_token_size) embedding_func = create_optimized_embedding_function( config_cache=config_cache, binding=args.embedding_binding, model=args.embedding_model, host=args.embedding_binding_host, api_key=args.embedding_binding_api_key, args=args, ) # Get embedding_send_dim from centralized configuration embedding_send_dim = args.embedding_send_dim # Check if the underlying function signature has embedding_dim parameter sig = inspect.signature(embedding_func.func) has_embedding_dim_param = "embedding_dim" in sig.parameters # Determine send_dimensions value based on binding type # Jina and Gemini REQUIRE dimension parameter (forced to True) # OpenAI and others: controlled by EMBEDDING_SEND_DIM environment variable if args.embedding_binding in ["jina", "gemini"]: # Jina and Gemini APIs require dimension parameter - always send it send_dimensions = has_embedding_dim_param dimension_control = f"forced by {args.embedding_binding.title()} API" else: # For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting send_dimensions = embedding_send_dim and has_embedding_dim_param if send_dimensions or not embedding_send_dim: dimension_control = "by env var" else: dimension_control = "by not hasparam" # Set send_dimensions on the EmbeddingFunc instance embedding_func.send_dimensions = send_dimensions logger.info( f"Send embedding dimension: {send_dimensions} {dimension_control} " f"(dimensions={embedding_func.embedding_dim}, has_param={has_embedding_dim_param}, " f"binding={args.embedding_binding})" ) # Log max_token_size source if embedding_func.max_token_size: source = ( "env variable" if args.embedding_token_limit else f"{args.embedding_binding} provider default" ) logger.info( f"Embedding max_token_size: {embedding_func.max_token_size} (from {source})" ) else: logger.info( "Embedding max_token_size: None (Embedding token limit is disabled)." ) # Configure rerank function based on args.rerank_bindingparameter rerank_model_func = None if args.rerank_binding != "null": from lightrag.rerank import cohere_rerank, jina_rerank, ali_rerank # Map rerank binding to corresponding function rerank_functions = { "cohere": cohere_rerank, "jina": jina_rerank, "aliyun": ali_rerank, } # Select the appropriate rerank function based on binding selected_rerank_func = rerank_functions.get(args.rerank_binding) if not selected_rerank_func: logger.error(f"Unsupported rerank binding: {args.rerank_binding}") raise ValueError(f"Unsupported rerank binding: {args.rerank_binding}") # Get default values from selected_rerank_func if args values are None if args.rerank_model is None or args.rerank_binding_host is None: sig = inspect.signature(selected_rerank_func) # Set default model if args.rerank_model is None if args.rerank_model is None and "model" in sig.parameters: default_model = sig.parameters["model"].default if default_model != inspect.Parameter.empty: args.rerank_model = default_model # Set default base_url if args.rerank_binding_host is None if args.rerank_binding_host is None and "base_url" in sig.parameters: default_base_url = sig.parameters["base_url"].default if default_base_url != inspect.Parameter.empty: args.rerank_binding_host = default_base_url async def server_rerank_func( query: str, documents: list, top_n: int = None, extra_body: dict = None ): """Server rerank function with configuration from environment variables""" # Prepare kwargs for rerank function kwargs = { "query": query, "documents": documents, "top_n": top_n, "api_key": args.rerank_binding_api_key, "model": args.rerank_model, "base_url": args.rerank_binding_host, } # Add Cohere-specific parameters if using cohere binding if args.rerank_binding == "cohere": # Enable chunking if configured (useful for models with token limits like ColBERT) kwargs["enable_chunking"] = ( os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true" ) kwargs["max_tokens_per_doc"] = int( os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096") ) return await selected_rerank_func(**kwargs, extra_body=extra_body) rerank_model_func = server_rerank_func logger.info( f"Reranking is enabled: {args.rerank_model or 'default model'} using {args.rerank_binding} provider" ) else: logger.info("Reranking is disabled") # Create ollama_server_infos from command line arguments from lightrag.api.config import OllamaServerInfos ollama_server_infos = OllamaServerInfos( name=args.simulated_model_name, tag=args.simulated_model_tag ) # Initialize RAG with unified configuration try: rag = LightRAG( working_dir=args.working_dir, workspace=args.workspace, llm_model_func=create_llm_model_func(args.llm_binding), llm_model_name=args.llm_model, llm_model_max_async=args.max_async, summary_max_tokens=args.summary_max_tokens, summary_context_size=args.summary_context_size, chunk_token_size=int(args.chunk_size), chunk_overlap_token_size=int(args.chunk_overlap_size), llm_model_kwargs=create_llm_model_kwargs( args.llm_binding, args, llm_timeout ), embedding_func=embedding_func, default_llm_timeout=llm_timeout, default_embedding_timeout=embedding_timeout, kv_storage=args.kv_storage, graph_storage=args.graph_storage, vector_storage=args.vector_storage, doc_status_storage=args.doc_status_storage, vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": args.cosine_threshold }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, rerank_model_func=rerank_model_func, max_parallel_insert=args.max_parallel_insert, max_graph_nodes=args.max_graph_nodes, addon_params={ "language": args.summary_language, "entity_types": args.entity_types, }, ollama_server_infos=ollama_server_infos, ) except Exception as e: logger.error(f"Failed to initialize LightRAG: {e}") raise # Add routes app.include_router( create_document_routes( rag, doc_manager, api_key, ) ) app.include_router(create_query_routes(rag, api_key, args.top_k)) app.include_router(create_graph_routes(rag, api_key)) # Add Ollama API routes ollama_api = OllamaAPI(rag, top_k=args.top_k, api_key=api_key) app.include_router(ollama_api.router, prefix="/api") # Custom Swagger UI endpoint for offline support @app.get("/docs", include_in_schema=False) async def custom_swagger_ui_html(): """Custom Swagger UI HTML with local static files""" return get_swagger_ui_html( openapi_url=app.openapi_url, title=app.title + " - Swagger UI", oauth2_redirect_url="/docs/oauth2-redirect", swagger_js_url="/static/swagger-ui/swagger-ui-bundle.js", swagger_css_url="/static/swagger-ui/swagger-ui.css", swagger_favicon_url="/static/swagger-ui/favicon-32x32.png", swagger_ui_parameters=app.swagger_ui_parameters, ) @app.get("/docs/oauth2-redirect", include_in_schema=False) async def swagger_ui_redirect(): """OAuth2 redirect for Swagger UI""" return get_swagger_ui_oauth2_redirect_html() @app.get("/") async def redirect_to_webui(): """Redirect root path based on WebUI availability""" if webui_assets_exist: return RedirectResponse(url="/webui") else: return RedirectResponse(url="/docs") @app.get("/auth-status") async def get_auth_status(): """Get authentication status and guest token if auth is not configured""" if not auth_handler.accounts: # Authentication not configured, return guest token guest_token = auth_handler.create_token( username="guest", role="guest", metadata={"auth_mode": "disabled"} ) return { "auth_configured": False, "access_token": guest_token, "token_type": "bearer", "auth_mode": "disabled", "message": "Authentication is disabled. Using guest access.", "core_version": core_version, "api_version": api_version_display, "webui_title": webui_title, "webui_description": webui_description, } return { "auth_configured": True, "auth_mode": "enabled", "core_version": core_version, "api_version": api_version_display, "webui_title": webui_title, "webui_description": webui_description, } @app.post("/login") async def login(form_data: OAuth2PasswordRequestForm = Depends()): if not auth_handler.accounts: # Authentication not configured, return guest token guest_token = auth_handler.create_token( username="guest", role="guest", metadata={"auth_mode": "disabled"} ) return { "access_token": guest_token, "token_type": "bearer", "auth_mode": "disabled", "message": "Authentication is disabled. Using guest access.", "core_version": core_version, "api_version": api_version_display, "webui_title": webui_title, "webui_description": webui_description, } username = form_data.username if auth_handler.accounts.get(username) != form_data.password: raise HTTPException(status_code=401, detail="Incorrect credentials") # Regular user login user_token = auth_handler.create_token( username=username, role="user", metadata={"auth_mode": "enabled"} ) return { "access_token": user_token, "token_type": "bearer", "auth_mode": "enabled", "core_version": core_version, "api_version": api_version_display, "webui_title": webui_title, "webui_description": webui_description, } @app.get( "/health", dependencies=[Depends(combined_auth)], summary="Get system health and configuration status", description="Returns comprehensive system status including WebUI availability, configuration, and operational metrics", response_description="System health status with configuration details", responses={ 200: { "description": "Successful response with system status", "content": { "application/json": { "example": { "status": "healthy", "webui_available": True, "working_directory": "/path/to/working/dir", "input_directory": "/path/to/input/dir", "configuration": { "llm_binding": "openai", "llm_model": "gpt-4", "embedding_binding": "openai", "embedding_model": "text-embedding-ada-002", "workspace": "default", }, "auth_mode": "enabled", "pipeline_busy": False, "core_version": "0.0.1", "api_version": "0.0.1", } } }, } }, ) async def get_status(request: Request): """Get current system status including WebUI availability""" try: workspace = get_workspace_from_request(request) default_workspace = get_default_workspace() if workspace is None: workspace = default_workspace pipeline_status = await get_namespace_data( "pipeline_status", workspace=workspace ) if not auth_configured: auth_mode = "disabled" else: auth_mode = "enabled" # Cleanup expired keyed locks and get status keyed_lock_info = cleanup_keyed_lock() return { "status": "healthy", "webui_available": webui_assets_exist, "working_directory": str(args.working_dir), "input_directory": str(args.input_dir), "configuration": { # LLM configuration binding/host address (if applicable)/model (if applicable) "llm_binding": args.llm_binding, "llm_binding_host": args.llm_binding_host, "llm_model": args.llm_model, # embedding model configuration binding/host address (if applicable)/model (if applicable) "embedding_binding": args.embedding_binding, "embedding_binding_host": args.embedding_binding_host, "embedding_model": args.embedding_model, "summary_max_tokens": args.summary_max_tokens, "summary_context_size": args.summary_context_size, "kv_storage": args.kv_storage, "doc_status_storage": args.doc_status_storage, "graph_storage": args.graph_storage, "vector_storage": args.vector_storage, "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract, "enable_llm_cache": args.enable_llm_cache, "workspace": default_workspace, "max_graph_nodes": args.max_graph_nodes, # Rerank configuration "enable_rerank": rerank_model_func is not None, "rerank_binding": args.rerank_binding, "rerank_model": args.rerank_model if rerank_model_func else None, "rerank_binding_host": args.rerank_binding_host if rerank_model_func else None, # Environment variable status (requested configuration) "summary_language": args.summary_language, "force_llm_summary_on_merge": args.force_llm_summary_on_merge, "max_parallel_insert": args.max_parallel_insert, "cosine_threshold": args.cosine_threshold, "min_rerank_score": args.min_rerank_score, "related_chunk_number": args.related_chunk_number, "max_async": args.max_async, "embedding_func_max_async": args.embedding_func_max_async, "embedding_batch_num": args.embedding_batch_num, }, "auth_mode": auth_mode, "pipeline_busy": pipeline_status.get("busy", False), "keyed_locks": keyed_lock_info, "core_version": core_version, "api_version": api_version_display, "webui_title": webui_title, "webui_description": webui_description, } except Exception as e: logger.error(f"Error getting health status: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) # Custom StaticFiles class for smart caching class SmartStaticFiles(StaticFiles): # Renamed from NoCacheStaticFiles async def get_response(self, path: str, scope): response = await super().get_response(path, scope) is_html = path.endswith(".html") or response.media_type == "text/html" if is_html: response.headers["Cache-Control"] = ( "no-cache, no-store, must-revalidate" ) response.headers["Pragma"] = "no-cache" response.headers["Expires"] = "0" elif ( "/assets/" in path ): # Assets (JS, CSS, images, fonts) generated by Vite with hash in filename response.headers["Cache-Control"] = ( "public, max-age=31536000, immutable" ) # Add other rules here if needed for non-HTML, non-asset files # Ensure correct Content-Type if path.endswith(".js"): response.headers["Content-Type"] = "application/javascript" elif path.endswith(".css"): response.headers["Content-Type"] = "text/css" return response # Mount Swagger UI static files for offline support swagger_static_dir = Path(__file__).parent / "static" / "swagger-ui" if swagger_static_dir.exists(): app.mount( "/static/swagger-ui", StaticFiles(directory=swagger_static_dir), name="swagger-ui-static", ) # Conditionally mount WebUI only if assets exist if webui_assets_exist: static_dir = Path(__file__).parent / "webui" static_dir.mkdir(exist_ok=True) app.mount( "/webui", SmartStaticFiles( directory=static_dir, html=True, check_dir=True ), # Use SmartStaticFiles name="webui", ) logger.info("WebUI assets mounted at /webui") else: logger.info("WebUI assets not available, /webui route not mounted") # Add redirect for /webui when assets are not available @app.get("/webui") @app.get("/webui/") async def webui_redirect_to_docs(): """Redirect /webui to /docs when WebUI is not available""" return RedirectResponse(url="/docs") return app def get_application(args=None): """Factory function for creating the FastAPI application""" if args is None: args = global_args return create_app(args) def configure_logging(): """Configure logging for uvicorn startup""" # Reset any existing handlers to ensure clean configuration for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: logger = logging.getLogger(logger_name) logger.handlers = [] logger.filters = [] # Get log directory path from environment variable log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME)) print(f"\nLightRAG log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_dir), exist_ok=True) # Get log file max size and backup count from environment variables log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int) log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int) logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { # Configure all uvicorn related loggers "uvicorn": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, "uvicorn.access": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, "filters": ["path_filter"], }, "uvicorn.error": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, "filters": ["path_filter"], }, }, "filters": { "path_filter": { "()": "lightrag.utils.LightragPathFilter", }, }, } ) def check_and_install_dependencies(): """Check and install required dependencies""" required_packages = [ "uvicorn", "tiktoken", "fastapi", # Add other required packages here ] for package in required_packages: if not pm.is_installed(package): print(f"Installing {package}...") pm.install(package) print(f"{package} installed successfully") def main(): # On Windows, ProactorEventLoop (default since Python 3.8) has known # race conditions with uvicorn's socket binding that can cause the server # to report it's running while the port is never actually bound. # Using SelectorEventLoop resolves this issue. # See: https://github.com/HKUDS/LightRAG/issues/2438 if sys.platform == "win32": import asyncio asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) # Explicitly initialize configuration for clarity # (The proxy will auto-initialize anyway, but this makes intent clear) from .config import initialize_config initialize_config() # Check if running under Gunicorn if "GUNICORN_CMD_ARGS" in os.environ: # If started with Gunicorn, return directly as Gunicorn will call get_application print("Running under Gunicorn - worker management handled by Gunicorn") return # Check .env file if not check_env_file(): sys.exit(1) # Check and install dependencies check_and_install_dependencies() from multiprocessing import freeze_support freeze_support() # Configure logging before parsing args configure_logging() update_uvicorn_mode_config() display_splash_screen(global_args) # Note: Signal handlers are NOT registered here because: # - Uvicorn has built-in signal handling that properly calls lifespan shutdown # - Custom signal handlers can interfere with uvicorn's graceful shutdown # - Cleanup is handled by the lifespan context manager's finally block # Create application instance directly instead of using factory function app = create_app(global_args) # Start Uvicorn in single process mode uvicorn_config = { "app": app, # Pass application instance directly instead of string path "host": global_args.host, "port": global_args.port, "log_config": None, # Disable default config } if global_args.ssl: uvicorn_config.update( { "ssl_certfile": global_args.ssl_certfile, "ssl_keyfile": global_args.ssl_keyfile, } ) print( f"Starting Uvicorn server in single-process mode on {global_args.host}:{global_args.port}" ) uvicorn.run(**uvicorn_config) if __name__ == "__main__": main() ================================================ FILE: lightrag/api/routers/__init__.py ================================================ """ This module contains all the routers for the LightRAG API. """ from .document_routes import router as document_router from .query_routes import router as query_router from .graph_routes import router as graph_router from .ollama_api import OllamaAPI __all__ = ["document_router", "query_router", "graph_router", "OllamaAPI"] ================================================ FILE: lightrag/api/routers/document_routes.py ================================================ """ This module contains all document-related routes for the LightRAG API. """ import asyncio from functools import lru_cache from lightrag.utils import logger, get_pinyin_sort_key import aiofiles import traceback from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Optional, Any, Literal from io import BytesIO from fastapi import ( APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile, ) from pydantic import BaseModel, ConfigDict, Field, field_validator from lightrag import LightRAG from lightrag.base import DeletionResult, DocProcessingStatus, DocStatus from lightrag.utils import ( generate_track_id, compute_mdhash_id, sanitize_text_for_encoding, ) from lightrag.api.utils_api import get_combined_auth_dependency from ..config import global_args @lru_cache(maxsize=1) def _is_docling_available() -> bool: """Check if docling is available (cached check). This function uses lru_cache to avoid repeated import attempts. The result is cached after the first call. Returns: bool: True if docling is available, False otherwise """ try: import docling # noqa: F401 # type: ignore[import-not-found] return True except ImportError: return False # Function to format datetime to ISO format string with timezone information def format_datetime(dt: Any) -> Optional[str]: """Format datetime to ISO format string with timezone information Args: dt: Datetime object, string, or None Returns: ISO format string with timezone information, or None if input is None """ if dt is None: return None if isinstance(dt, str): return dt # Check if datetime object has timezone information if isinstance(dt, datetime): # If datetime object has no timezone info (naive datetime), add UTC timezone if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) # Return ISO format string with timezone information return dt.isoformat() router = APIRouter( prefix="/documents", tags=["documents"], ) # Temporary file prefix temp_prefix = "__tmp__" UNKNOWN_FILE_SOURCE = "unknown_source" LEGACY_EMPTY_FILE_PATH_SENTINELS = {"", "no-file-path"} def normalize_file_path(file_path: str | None) -> str: """Normalize missing document sources to a single non-null sentinel.""" if file_path is None: return UNKNOWN_FILE_SOURCE normalized = file_path.strip() if normalized in LEGACY_EMPTY_FILE_PATH_SENTINELS: return UNKNOWN_FILE_SOURCE return normalized def sanitize_filename(filename: str, input_dir: Path) -> str: """ Sanitize uploaded filename to prevent Path Traversal attacks. Args: filename: The original filename from the upload input_dir: The target input directory Returns: str: Sanitized filename that is safe to use Raises: HTTPException: If the filename is unsafe or invalid """ # Basic validation if not filename or not filename.strip(): raise HTTPException(status_code=400, detail="Filename cannot be empty") # Remove path separators and traversal sequences clean_name = filename.replace("/", "").replace("\\", "") clean_name = clean_name.replace("..", "") # Remove control characters and null bytes clean_name = "".join(c for c in clean_name if ord(c) >= 32 and c != "\x7f") # Remove leading/trailing whitespace and dots clean_name = clean_name.strip().strip(".") # Check if anything is left after sanitization if not clean_name: raise HTTPException( status_code=400, detail="Invalid filename after sanitization" ) # Verify the final path stays within the input directory try: final_path = (input_dir / clean_name).resolve() if not final_path.is_relative_to(input_dir.resolve()): raise HTTPException(status_code=400, detail="Unsafe filename detected") except (OSError, ValueError): raise HTTPException(status_code=400, detail="Invalid filename") return clean_name class ScanResponse(BaseModel): """Response model for document scanning operation Attributes: status: Status of the scanning operation message: Optional message with additional details track_id: Tracking ID for monitoring scanning progress """ status: Literal["scanning_started"] = Field( description="Status of the scanning operation" ) message: Optional[str] = Field( default=None, description="Additional details about the scanning operation" ) track_id: str = Field(description="Tracking ID for monitoring scanning progress") model_config = ConfigDict( json_schema_extra={ "example": { "status": "scanning_started", "message": "Scanning process has been initiated in the background", "track_id": "scan_20250729_170612_abc123", } } ) class ReprocessResponse(BaseModel): """Response model for reprocessing failed documents operation Attributes: status: Status of the reprocessing operation message: Message describing the operation result track_id: Always empty string. Reprocessed documents retain their original track_id. """ status: Literal["reprocessing_started"] = Field( description="Status of the reprocessing operation" ) message: str = Field(description="Human-readable message describing the operation") track_id: str = Field( default="", description="Always empty string. Reprocessed documents retain their original track_id from initial upload.", ) model_config = ConfigDict( json_schema_extra={ "example": { "status": "reprocessing_started", "message": "Reprocessing of failed documents has been initiated in background", "track_id": "", } } ) class CancelPipelineResponse(BaseModel): """Response model for pipeline cancellation operation Attributes: status: Status of the cancellation request message: Message describing the operation result """ status: Literal["cancellation_requested", "not_busy"] = Field( description="Status of the cancellation request" ) message: str = Field(description="Human-readable message describing the operation") model_config = ConfigDict( json_schema_extra={ "example": { "status": "cancellation_requested", "message": "Pipeline cancellation has been requested. Documents will be marked as FAILED.", } } ) class InsertTextRequest(BaseModel): """Request model for inserting a single text document Attributes: text: The text content to be inserted into the RAG system file_source: Source of the text (optional) """ text: str = Field( min_length=1, description="The text to insert", ) file_source: Optional[str] = Field( default=None, min_length=0, description="File Source" ) @field_validator("text", mode="after") @classmethod def strip_text_after(cls, text: str) -> str: return text.strip() @field_validator("file_source", mode="before") @classmethod def normalize_source_before(cls, file_source: Optional[str]) -> str: return normalize_file_path(file_source) model_config = ConfigDict( json_schema_extra={ "example": { "text": "This is a sample text to be inserted into the RAG system.", "file_source": "Source of the text (optional)", } } ) class InsertTextsRequest(BaseModel): """Request model for inserting multiple text documents Attributes: texts: List of text contents to be inserted into the RAG system file_sources: Sources of the texts (optional) """ texts: list[str] = Field( min_length=1, description="The texts to insert", ) file_sources: Optional[list[str]] = Field( default=None, min_length=0, description="Sources of the texts" ) @field_validator("texts", mode="after") @classmethod def strip_texts_after(cls, texts: list[str]) -> list[str]: return [text.strip() for text in texts] @field_validator("file_sources", mode="before") @classmethod def normalize_sources_before( cls, file_sources: Optional[list[str]] ) -> Optional[list[str]]: if file_sources is None: return None return [normalize_file_path(file_source) for file_source in file_sources] model_config = ConfigDict( json_schema_extra={ "example": { "texts": [ "This is the first text to be inserted.", "This is the second text to be inserted.", ], "file_sources": [ "First file source (optional)", ], } } ) class InsertResponse(BaseModel): """Response model for document insertion operations Attributes: status: Status of the operation (success, duplicated, partial_success, failure) message: Detailed message describing the operation result track_id: Tracking ID for monitoring processing status """ status: Literal["success", "duplicated", "partial_success", "failure"] = Field( description="Status of the operation" ) message: str = Field(description="Message describing the operation result") track_id: str = Field(description="Tracking ID for monitoring processing status") model_config = ConfigDict( json_schema_extra={ "example": { "status": "success", "message": "File 'document.pdf' uploaded successfully. Processing will continue in background.", "track_id": "upload_20250729_170612_abc123", } } ) class ClearDocumentsResponse(BaseModel): """Response model for document clearing operation Attributes: status: Status of the clear operation message: Detailed message describing the operation result """ status: Literal["success", "partial_success", "busy", "fail"] = Field( description="Status of the clear operation" ) message: str = Field(description="Message describing the operation result") model_config = ConfigDict( json_schema_extra={ "example": { "status": "success", "message": "All documents cleared successfully. Deleted 15 files.", } } ) class ClearCacheRequest(BaseModel): """Request model for clearing cache This model is kept for API compatibility but no longer accepts any parameters. All cache will be cleared regardless of the request content. """ model_config = ConfigDict(json_schema_extra={"example": {}}) class ClearCacheResponse(BaseModel): """Response model for cache clearing operation Attributes: status: Status of the clear operation message: Detailed message describing the operation result """ status: Literal["success", "fail"] = Field( description="Status of the clear operation" ) message: str = Field(description="Message describing the operation result") model_config = ConfigDict( json_schema_extra={ "example": { "status": "success", "message": "Successfully cleared cache for modes: ['default', 'naive']", } } ) """Response model for document status Attributes: id: Document identifier content_summary: Summary of document content content_length: Length of document content status: Current processing status created_at: Creation timestamp (ISO format string) updated_at: Last update timestamp (ISO format string) chunks_count: Number of chunks (optional) error: Error message if any (optional) metadata: Additional metadata (optional) file_path: Path to the document file """ class DeleteDocRequest(BaseModel): doc_ids: List[str] = Field(..., description="The IDs of the documents to delete.") delete_file: bool = Field( default=False, description="Whether to delete the corresponding file in the upload directory.", ) delete_llm_cache: bool = Field( default=False, description="Whether to delete cached LLM extraction results for the documents.", ) @field_validator("doc_ids", mode="after") @classmethod def validate_doc_ids(cls, doc_ids: List[str]) -> List[str]: if not doc_ids: raise ValueError("Document IDs list cannot be empty") validated_ids = [] for doc_id in doc_ids: if not doc_id or not doc_id.strip(): raise ValueError("Document ID cannot be empty") validated_ids.append(doc_id.strip()) # Check for duplicates if len(validated_ids) != len(set(validated_ids)): raise ValueError("Document IDs must be unique") return validated_ids class DeleteEntityRequest(BaseModel): entity_name: str = Field(..., description="The name of the entity to delete.") @field_validator("entity_name", mode="after") @classmethod def validate_entity_name(cls, entity_name: str) -> str: if not entity_name or not entity_name.strip(): raise ValueError("Entity name cannot be empty") return entity_name.strip() class DeleteRelationRequest(BaseModel): source_entity: str = Field(..., description="The name of the source entity.") target_entity: str = Field(..., description="The name of the target entity.") @field_validator("source_entity", "target_entity", mode="after") @classmethod def validate_entity_names(cls, entity_name: str) -> str: if not entity_name or not entity_name.strip(): raise ValueError("Entity name cannot be empty") return entity_name.strip() class DocStatusResponse(BaseModel): id: str = Field(description="Document identifier") content_summary: str = Field(description="Summary of document content") content_length: int = Field(description="Length of document content in characters") status: DocStatus = Field(description="Current processing status") created_at: str = Field(description="Creation timestamp (ISO format string)") updated_at: str = Field(description="Last update timestamp (ISO format string)") track_id: Optional[str] = Field( default=None, description="Tracking ID for monitoring progress" ) chunks_count: Optional[int] = Field( default=None, description="Number of chunks the document was split into" ) error_msg: Optional[str] = Field( default=None, description="Error message if processing failed" ) metadata: Optional[dict[str, Any]] = Field( default=None, description="Additional metadata about the document" ) file_path: str = Field(description="Path to the document file") model_config = ConfigDict( json_schema_extra={ "example": { "id": "doc_123456", "content_summary": "Research paper on machine learning", "content_length": 15240, "status": "processed", "created_at": "2025-03-31T12:34:56", "updated_at": "2025-03-31T12:35:30", "track_id": "upload_20250729_170612_abc123", "chunks_count": 12, "error": None, "metadata": {"author": "John Doe", "year": 2025}, "file_path": "research_paper.pdf", } } ) class DocsStatusesResponse(BaseModel): """Response model for document statuses Attributes: statuses: Dictionary mapping document status to lists of document status responses """ statuses: Dict[DocStatus, List[DocStatusResponse]] = Field( default_factory=dict, description="Dictionary mapping document status to lists of document status responses", ) model_config = ConfigDict( json_schema_extra={ "example": { "statuses": { "PENDING": [ { "id": "doc_123", "content_summary": "Pending document", "content_length": 5000, "status": "pending", "created_at": "2025-03-31T10:00:00", "updated_at": "2025-03-31T10:00:00", "track_id": "upload_20250331_100000_abc123", "chunks_count": None, "error": None, "metadata": None, "file_path": "pending_doc.pdf", } ], "PREPROCESSED": [ { "id": "doc_789", "content_summary": "Document pending final indexing", "content_length": 7200, "status": "preprocessed", "created_at": "2025-03-31T09:30:00", "updated_at": "2025-03-31T09:35:00", "track_id": "upload_20250331_093000_xyz789", "chunks_count": 10, "error": None, "metadata": None, "file_path": "preprocessed_doc.pdf", } ], "PROCESSED": [ { "id": "doc_456", "content_summary": "Processed document", "content_length": 8000, "status": "processed", "created_at": "2025-03-31T09:00:00", "updated_at": "2025-03-31T09:05:00", "track_id": "insert_20250331_090000_def456", "chunks_count": 8, "error": None, "metadata": {"author": "John Doe"}, "file_path": "processed_doc.pdf", } ], } } } ) class TrackStatusResponse(BaseModel): """Response model for tracking document processing status by track_id Attributes: track_id: The tracking ID documents: List of documents associated with this track_id total_count: Total number of documents for this track_id status_summary: Count of documents by status """ track_id: str = Field(description="The tracking ID") documents: List[DocStatusResponse] = Field( description="List of documents associated with this track_id" ) total_count: int = Field(description="Total number of documents for this track_id") status_summary: Dict[str, int] = Field(description="Count of documents by status") model_config = ConfigDict( json_schema_extra={ "example": { "track_id": "upload_20250729_170612_abc123", "documents": [ { "id": "doc_123456", "content_summary": "Research paper on machine learning", "content_length": 15240, "status": "PROCESSED", "created_at": "2025-03-31T12:34:56", "updated_at": "2025-03-31T12:35:30", "track_id": "upload_20250729_170612_abc123", "chunks_count": 12, "error": None, "metadata": {"author": "John Doe", "year": 2025}, "file_path": "research_paper.pdf", } ], "total_count": 1, "status_summary": {"PROCESSED": 1}, } } ) class DocumentsRequest(BaseModel): """Request model for paginated document queries Attributes: status_filter: Filter by document status, None for all statuses page: Page number (1-based) page_size: Number of documents per page (10-200) sort_field: Field to sort by ('created_at', 'updated_at', 'id', 'file_path') sort_direction: Sort direction ('asc' or 'desc') """ status_filter: Optional[DocStatus] = Field( default=None, description="Filter by document status, None for all statuses" ) page: int = Field(default=1, ge=1, description="Page number (1-based)") page_size: int = Field( default=50, ge=10, le=200, description="Number of documents per page (10-200)" ) sort_field: Literal["created_at", "updated_at", "id", "file_path"] = Field( default="updated_at", description="Field to sort by" ) sort_direction: Literal["asc", "desc"] = Field( default="desc", description="Sort direction" ) model_config = ConfigDict( json_schema_extra={ "example": { "status_filter": "PROCESSED", "page": 1, "page_size": 50, "sort_field": "updated_at", "sort_direction": "desc", } } ) class PaginationInfo(BaseModel): """Pagination information Attributes: page: Current page number page_size: Number of items per page total_count: Total number of items total_pages: Total number of pages has_next: Whether there is a next page has_prev: Whether there is a previous page """ page: int = Field(description="Current page number") page_size: int = Field(description="Number of items per page") total_count: int = Field(description="Total number of items") total_pages: int = Field(description="Total number of pages") has_next: bool = Field(description="Whether there is a next page") has_prev: bool = Field(description="Whether there is a previous page") model_config = ConfigDict( json_schema_extra={ "example": { "page": 1, "page_size": 50, "total_count": 150, "total_pages": 3, "has_next": True, "has_prev": False, } } ) class PaginatedDocsResponse(BaseModel): """Response model for paginated document queries Attributes: documents: List of documents for the current page pagination: Pagination information status_counts: Count of documents by status for all documents """ documents: List[DocStatusResponse] = Field( description="List of documents for the current page" ) pagination: PaginationInfo = Field(description="Pagination information") status_counts: Dict[str, int] = Field( description="Count of documents by status for all documents" ) model_config = ConfigDict( json_schema_extra={ "example": { "documents": [ { "id": "doc_123456", "content_summary": "Research paper on machine learning", "content_length": 15240, "status": "PROCESSED", "created_at": "2025-03-31T12:34:56", "updated_at": "2025-03-31T12:35:30", "track_id": "upload_20250729_170612_abc123", "chunks_count": 12, "error_msg": None, "metadata": {"author": "John Doe", "year": 2025}, "file_path": "research_paper.pdf", } ], "pagination": { "page": 1, "page_size": 50, "total_count": 150, "total_pages": 3, "has_next": True, "has_prev": False, }, "status_counts": { "PENDING": 10, "PROCESSING": 5, "PREPROCESSED": 5, "PROCESSED": 130, "FAILED": 5, }, } } ) class StatusCountsResponse(BaseModel): """Response model for document status counts Attributes: status_counts: Count of documents by status """ status_counts: Dict[str, int] = Field(description="Count of documents by status") model_config = ConfigDict( json_schema_extra={ "example": { "status_counts": { "PENDING": 10, "PROCESSING": 5, "PREPROCESSED": 5, "PROCESSED": 130, "FAILED": 5, } } } ) class PipelineStatusResponse(BaseModel): """Response model for pipeline status Attributes: autoscanned: Whether auto-scan has started busy: Whether the pipeline is currently busy job_name: Current job name (e.g., indexing files/indexing texts) job_start: Job start time as ISO format string with timezone (optional) docs: Total number of documents to be indexed batchs: Number of batches for processing documents cur_batch: Current processing batch request_pending: Flag for pending request for processing latest_message: Latest message from pipeline processing history_messages: List of history messages update_status: Status of update flags for all namespaces """ autoscanned: bool = False busy: bool = False job_name: str = "Default Job" job_start: Optional[str] = None docs: int = 0 batchs: int = 0 cur_batch: int = 0 request_pending: bool = False latest_message: str = "" history_messages: Optional[List[str]] = None update_status: Optional[dict] = None @field_validator("job_start", mode="before") @classmethod def parse_job_start(cls, value): """Process datetime and return as ISO format string with timezone""" return format_datetime(value) model_config = ConfigDict(extra="allow") class DocumentManager: def __init__( self, input_dir: str, workspace: str = "", # New parameter for workspace isolation supported_extensions: tuple = ( ".txt", ".md", ".mdx", # MDX (Markdown + JSX) ".pdf", ".docx", ".pptx", ".xlsx", ".rtf", # Rich Text Format ".odt", # OpenDocument Text ".tex", # LaTeX ".epub", # Electronic Publication ".html", # HyperText Markup Language ".htm", # HyperText Markup Language ".csv", # Comma-Separated Values ".json", # JavaScript Object Notation ".xml", # eXtensible Markup Language ".yaml", # YAML Ain't Markup Language ".yml", # YAML ".log", # Log files ".conf", # Configuration files ".ini", # Initialization files ".properties", # Java properties files ".sql", # SQL scripts ".bat", # Batch files ".sh", # Shell scripts ".c", # C source code ".h", # C header ".cpp", # C++ source code ".hpp", # C++ header ".py", # Python source code ".java", # Java source code ".js", # JavaScript source code ".ts", # TypeScript source code ".swift", # Swift source code ".go", # Go source code ".rb", # Ruby source code ".php", # PHP source code ".css", # Cascading Style Sheets ".scss", # Sassy CSS ".less", # LESS CSS ), ): # Store the base input directory and workspace self.base_input_dir = Path(input_dir) self.workspace = workspace self.supported_extensions = supported_extensions self.indexed_files = set() # Create workspace-specific input directory # If workspace is provided, create a subdirectory for data isolation if workspace: self.input_dir = self.base_input_dir / workspace else: self.input_dir = self.base_input_dir # Create input directory if it doesn't exist self.input_dir.mkdir(parents=True, exist_ok=True) def scan_directory_for_new_files(self) -> List[Path]: """Scan input directory for new files""" new_files = [] for ext in self.supported_extensions: logger.debug(f"Scanning for {ext} files in {self.input_dir}") for file_path in self.input_dir.glob(f"*{ext}"): if file_path not in self.indexed_files: new_files.append(file_path) return new_files def mark_as_indexed(self, file_path: Path): self.indexed_files.add(file_path) def is_supported_file(self, filename: str) -> bool: return any(filename.lower().endswith(ext) for ext in self.supported_extensions) def validate_file_path_security(file_path_str: str, base_dir: Path) -> Optional[Path]: """ Validate file path security to prevent Path Traversal attacks. Args: file_path_str: The file path string to validate base_dir: The base directory that the file must be within Returns: Path: Safe file path if valid, None if unsafe or invalid """ if not file_path_str or not file_path_str.strip(): return None try: # Clean the file path string clean_path_str = file_path_str.strip() # Check for obvious path traversal patterns before processing # This catches both Unix (..) and Windows (..\) style traversals if ".." in clean_path_str: # Additional check for Windows-style backslash traversal if ( "\\..\\" in clean_path_str or clean_path_str.startswith("..\\") or clean_path_str.endswith("\\..") ): # logger.warning( # f"Security violation: Windows path traversal attempt detected - {file_path_str}" # ) return None # Normalize path separators (convert backslashes to forward slashes) # This helps handle Windows-style paths on Unix systems normalized_path = clean_path_str.replace("\\", "/") # Create path object and resolve it (handles symlinks and relative paths) candidate_path = (base_dir / normalized_path).resolve() base_dir_resolved = base_dir.resolve() # Check if the resolved path is within the base directory if not candidate_path.is_relative_to(base_dir_resolved): # logger.warning( # f"Security violation: Path traversal attempt detected - {file_path_str}" # ) return None return candidate_path except (OSError, ValueError, Exception) as e: logger.warning(f"Invalid file path detected: {file_path_str} - {str(e)}") return None def get_unique_filename_in_enqueued(target_dir: Path, original_name: str) -> str: """Generate a unique filename in the target directory by adding numeric suffixes if needed Args: target_dir: Target directory path original_name: Original filename Returns: str: Unique filename (may have numeric suffix added) """ import time original_path = Path(original_name) base_name = original_path.stem extension = original_path.suffix # Try original name first if not (target_dir / original_name).exists(): return original_name # Try with numeric suffixes 001-999 for i in range(1, 1000): suffix = f"{i:03d}" new_name = f"{base_name}_{suffix}{extension}" if not (target_dir / new_name).exists(): return new_name # Fallback with timestamp if all 999 slots are taken timestamp = int(time.time()) return f"{base_name}_{timestamp}{extension}" # Document processing helper functions (synchronous) # These functions run in thread pool via asyncio.to_thread() to avoid blocking the event loop def _convert_with_docling(file_path: Path) -> str: """Convert document using docling (synchronous). Args: file_path: Path to the document file Returns: str: Extracted markdown content """ from docling.document_converter import DocumentConverter # type: ignore converter = DocumentConverter() result = converter.convert(file_path) return result.document.export_to_markdown() def _extract_pdf_pypdf(file_bytes: bytes, password: str = None) -> str: """Extract PDF content using pypdf (synchronous). Args: file_bytes: PDF file content as bytes password: Optional password for encrypted PDFs Returns: str: Extracted text content Raises: Exception: If PDF is encrypted and password is incorrect or missing """ from pypdf import PdfReader # type: ignore pdf_file = BytesIO(file_bytes) reader = PdfReader(pdf_file) # Check if PDF is encrypted if reader.is_encrypted: if not password: raise Exception("PDF is encrypted but no password provided") decrypt_result = reader.decrypt(password) if decrypt_result == 0: raise Exception("Incorrect PDF password") # Extract text from all pages content = "" for page in reader.pages: content += page.extract_text() + "\n" return content def _extract_docx(file_bytes: bytes) -> str: """Extract DOCX content including tables in document order (synchronous). Args: file_bytes: DOCX file content as bytes Returns: str: Extracted text content with tables in their original positions. Tables are separated from paragraphs with blank lines for clarity. """ from docx import Document # type: ignore from docx.table import Table # type: ignore from docx.text.paragraph import Paragraph # type: ignore docx_file = BytesIO(file_bytes) doc = Document(docx_file) def escape_cell(cell_value: str | None) -> str: """Escape characters that would break tab-delimited layout. Escape order is critical: backslashes first, then tabs/newlines. This prevents double-escaping issues. Args: cell_value: The cell value to escape (can be None or str) Returns: str: Escaped cell value safe for tab-delimited format """ if cell_value is None: return "" text = str(cell_value) # CRITICAL: Escape backslash first to avoid double-escaping return ( text.replace("\\", "\\\\") # Must be first: \ -> \\ .replace("\t", "  ") # Tab -> \t (visible) .replace("\r\n", "
") # Windows newline -> \n .replace("\r", "
") # Mac newline -> \n .replace("\n", "
") # Unix newline -> \n ) content_parts = [] in_table = False # Track if we're currently processing a table # Iterate through all body elements in document order for element in doc.element.body: # Check if element is a paragraph if element.tag.endswith("p"): # If coming out of a table, add blank line after table if in_table: content_parts.append("") # Blank line after table in_table = False paragraph = Paragraph(element, doc) text = paragraph.text # Always append to preserve document spacing (including blank paragraphs) content_parts.append(text) # Check if element is a table elif element.tag.endswith("tbl"): # Add blank line before table (if content exists) if content_parts and not in_table: content_parts.append("") # Blank line before table in_table = True table = Table(element, doc) for row in table.rows: row_text = [] for cell in row.cells: cell_text = cell.text # Escape special characters to preserve tab-delimited structure row_text.append(escape_cell(cell_text)) # Only add row if at least one cell has content if any(cell for cell in row_text): content_parts.append("\t".join(row_text)) return "\n".join(content_parts) def _extract_pptx(file_bytes: bytes) -> str: """Extract PPTX content (synchronous). Args: file_bytes: PPTX file content as bytes Returns: str: Extracted text content """ from pptx import Presentation # type: ignore pptx_file = BytesIO(file_bytes) prs = Presentation(pptx_file) content = "" for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): content += shape.text + "\n" return content def _extract_xlsx(file_bytes: bytes) -> str: """Extract XLSX content in tab-delimited format with clear sheet separation. This function processes Excel workbooks and converts them to a structured text format suitable for LLM prompts and RAG systems. Each sheet is clearly delimited with separator lines, and special characters are escaped to preserve the tab-delimited structure. Features: - Each sheet is wrapped with '====================' separators for visual distinction - Special characters (tabs, newlines, backslashes) are escaped to prevent structure corruption - Column alignment is preserved across all rows to maintain tabular structure - Empty rows are preserved as blank lines to maintain row structure - Uses sheet.max_column to determine column width efficiently Args: file_bytes: XLSX file content as bytes Returns: str: Extracted text content with all sheets in tab-delimited format. Format: Sheet separators, sheet name, then tab-delimited rows. Example output: ==================== Sheet: Data ==================== Name\tAge\tCity Alice\t30\tNew York Bob\t25\tLondon ==================== Sheet: Summary ==================== Total\t2 ==================== """ from openpyxl import load_workbook # type: ignore xlsx_file = BytesIO(file_bytes) wb = load_workbook(xlsx_file) def escape_cell(cell_value: str | int | float | None) -> str: """Escape characters that would break tab-delimited layout. Escape order is critical: backslashes first, then tabs/newlines. This prevents double-escaping issues. Args: cell_value: The cell value to escape (can be None, str, int, or float) Returns: str: Escaped cell value safe for tab-delimited format """ if cell_value is None: return "" text = str(cell_value) # CRITICAL: Escape backslash first to avoid double-escaping return ( text.replace("\\", "\\\\") # Must be first: \ -> \\ .replace("\t", "\\t") # Tab -> \t (visible) .replace("\r\n", "\\n") # Windows newline -> \n .replace("\r", "\\n") # Mac newline -> \n .replace("\n", "\\n") # Unix newline -> \n ) def escape_sheet_title(title: str) -> str: """Escape sheet title to prevent formatting issues in separators. Args: title: Original sheet title Returns: str: Sanitized sheet title with tabs/newlines replaced """ return str(title).replace("\n", " ").replace("\t", " ").replace("\r", " ") content_parts: list[str] = [] sheet_separator = "=" * 20 for idx, sheet in enumerate(wb): if idx > 0: content_parts.append("") # Blank line between sheets for readability # Escape sheet title to handle edge cases with special characters safe_title = escape_sheet_title(sheet.title) content_parts.append(f"{sheet_separator} Sheet: {safe_title} {sheet_separator}") # Use sheet.max_column to get the maximum column width directly max_columns = sheet.max_column if sheet.max_column else 0 # Extract rows with consistent width to preserve column alignment for row in sheet.iter_rows(values_only=True): row_parts = [] # Build row up to max_columns width for idx in range(max_columns): if idx < len(row): row_parts.append(escape_cell(row[idx])) else: row_parts.append("") # Pad short rows # Check if row is completely empty if all(part == "" for part in row_parts): # Preserve empty rows as blank lines (maintains row structure) content_parts.append("") else: # Join all columns to maintain consistent column count content_parts.append("\t".join(row_parts)) # Final separator for symmetry (makes parsing easier) content_parts.append(sheet_separator) return "\n".join(content_parts) async def pipeline_enqueue_file( rag: LightRAG, file_path: Path, track_id: str = None ) -> tuple[bool, str]: """Add a file to the queue for processing Args: rag: LightRAG instance file_path: Path to the saved file track_id: Optional tracking ID, if not provided will be generated Returns: tuple: (success: bool, track_id: str) """ # Generate track_id if not provided if track_id is None: track_id = generate_track_id("unknown") try: content = "" ext = file_path.suffix.lower() file_size = 0 # Get file size for error reporting try: file_size = file_path.stat().st_size except Exception: file_size = 0 file = None try: async with aiofiles.open(file_path, "rb") as f: file = await f.read() except PermissionError as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]Permission denied - cannot read file", "original_error": str(e), "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error( f"[File Extraction]Permission denied reading file: {file_path.name}" ) return False, track_id except FileNotFoundError as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]File not found", "original_error": str(e), "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error(f"[File Extraction]File not found: {file_path.name}") return False, track_id except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]File reading error", "original_error": str(e), "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error( f"[File Extraction]Error reading file {file_path.name}: {str(e)}" ) return False, track_id # Process based on file type try: match ext: case ( ".txt" | ".md" | ".mdx" | ".html" | ".htm" | ".tex" | ".json" | ".xml" | ".yaml" | ".yml" | ".rtf" | ".odt" | ".epub" | ".csv" | ".log" | ".conf" | ".ini" | ".properties" | ".sql" | ".bat" | ".sh" | ".c" | ".h" | ".cpp" | ".hpp" | ".py" | ".java" | ".js" | ".ts" | ".swift" | ".go" | ".rb" | ".php" | ".css" | ".scss" | ".less" ): try: # Try to decode as UTF-8 content = file.decode("utf-8") # Validate content if not content or len(content.strip()) == 0: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]Empty file content", "original_error": "File contains no content or only whitespace", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]Empty content in file: {file_path.name}" ) return False, track_id # Check if content looks like binary data string representation if content.startswith("b'") or content.startswith('b"'): error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]Binary data in text file", "original_error": "File appears to contain binary data representation instead of text", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]File {file_path.name} appears to contain binary data representation instead of text" ) return False, track_id except UnicodeDecodeError as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]UTF-8 encoding error, please convert it to UTF-8 before processing", "original_error": f"File is not valid UTF-8 encoded text: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]File {file_path.name} is not valid UTF-8 encoded text. Please convert it to UTF-8 before processing." ) return False, track_id case ".pdf": try: # Try DOCLING first if configured and available if ( global_args.document_loading_engine == "DOCLING" and _is_docling_available() ): content = await asyncio.to_thread( _convert_with_docling, file_path ) else: if ( global_args.document_loading_engine == "DOCLING" and not _is_docling_available() ): logger.warning( f"DOCLING engine configured but not available for {file_path.name}. Falling back to pypdf." ) # Use pypdf (non-blocking via to_thread) content = await asyncio.to_thread( _extract_pdf_pypdf, file, global_args.pdf_decrypt_password, ) except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]PDF processing error", "original_error": f"Failed to extract text from PDF: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]Error processing PDF {file_path.name}: {str(e)}" ) return False, track_id case ".docx": try: # Try DOCLING first if configured and available if ( global_args.document_loading_engine == "DOCLING" and _is_docling_available() ): content = await asyncio.to_thread( _convert_with_docling, file_path ) else: if ( global_args.document_loading_engine == "DOCLING" and not _is_docling_available() ): logger.warning( f"DOCLING engine configured but not available for {file_path.name}. Falling back to python-docx." ) # Use python-docx (non-blocking via to_thread) content = await asyncio.to_thread(_extract_docx, file) except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]DOCX processing error", "original_error": f"Failed to extract text from DOCX: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]Error processing DOCX {file_path.name}: {str(e)}" ) return False, track_id case ".pptx": try: # Try DOCLING first if configured and available if ( global_args.document_loading_engine == "DOCLING" and _is_docling_available() ): content = await asyncio.to_thread( _convert_with_docling, file_path ) else: if ( global_args.document_loading_engine == "DOCLING" and not _is_docling_available() ): logger.warning( f"DOCLING engine configured but not available for {file_path.name}. Falling back to python-pptx." ) # Use python-pptx (non-blocking via to_thread) content = await asyncio.to_thread(_extract_pptx, file) except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]PPTX processing error", "original_error": f"Failed to extract text from PPTX: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]Error processing PPTX {file_path.name}: {str(e)}" ) return False, track_id case ".xlsx": try: # Try DOCLING first if configured and available if ( global_args.document_loading_engine == "DOCLING" and _is_docling_available() ): content = await asyncio.to_thread( _convert_with_docling, file_path ) else: if ( global_args.document_loading_engine == "DOCLING" and not _is_docling_available() ): logger.warning( f"DOCLING engine configured but not available for {file_path.name}. Falling back to openpyxl." ) # Use openpyxl (non-blocking via to_thread) content = await asyncio.to_thread(_extract_xlsx, file) except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]XLSX processing error", "original_error": f"Failed to extract text from XLSX: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents( error_files, track_id ) logger.error( f"[File Extraction]Error processing XLSX {file_path.name}: {str(e)}" ) return False, track_id case _: error_files = [ { "file_path": str(file_path.name), "error_description": f"[File Extraction]Unsupported file type: {ext}", "original_error": f"File extension {ext} is not supported", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error( f"[File Extraction]Unsupported file type: {file_path.name} (extension {ext})" ) return False, track_id except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]File format processing error", "original_error": f"Unexpected error during file extracting: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error( f"[File Extraction]Unexpected error during {file_path.name} extracting: {str(e)}" ) return False, track_id # Insert into the RAG queue if content: # Check if content contains only whitespace characters if not content.strip(): error_files = [ { "file_path": str(file_path.name), "error_description": "[File Extraction]File contains only whitespace", "original_error": "File content contains only whitespace characters", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.warning( f"[File Extraction]File contains only whitespace characters: {file_path.name}" ) return False, track_id try: await rag.apipeline_enqueue_documents( content, file_paths=file_path.name, track_id=track_id ) logger.info( f"Successfully extracted and enqueued file: {file_path.name}" ) # Move file to __enqueued__ directory after enqueuing try: enqueued_dir = file_path.parent / "__enqueued__" enqueued_dir.mkdir(exist_ok=True) # Generate unique filename to avoid conflicts unique_filename = get_unique_filename_in_enqueued( enqueued_dir, file_path.name ) target_path = enqueued_dir / unique_filename # Move the file file_path.rename(target_path) logger.debug( f"Moved file to enqueued directory: {file_path.name} -> {unique_filename}" ) except Exception as move_error: logger.error( f"Failed to move file {file_path.name} to __enqueued__ directory: {move_error}" ) # Don't affect the main function's success status return True, track_id except Exception as e: error_files = [ { "file_path": str(file_path.name), "error_description": "Document enqueue error", "original_error": f"Failed to enqueue document: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error(f"Error enqueueing document {file_path.name}: {str(e)}") return False, track_id else: error_files = [ { "file_path": str(file_path.name), "error_description": "No content extracted", "original_error": "No content could be extracted from file", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error(f"No content extracted from file: {file_path.name}") return False, track_id except Exception as e: # Catch-all for any unexpected errors try: file_size = file_path.stat().st_size if file_path.exists() else 0 except Exception: file_size = 0 error_files = [ { "file_path": str(file_path.name), "error_description": "Unexpected processing error", "original_error": f"Unexpected error: {str(e)}", "file_size": file_size, } ] await rag.apipeline_enqueue_error_documents(error_files, track_id) logger.error(f"Enqueuing file {file_path.name} error: {str(e)}") logger.error(traceback.format_exc()) return False, track_id finally: if file_path.name.startswith(temp_prefix): try: file_path.unlink() except Exception as e: logger.error(f"Error deleting file {file_path}: {str(e)}") async def pipeline_index_file(rag: LightRAG, file_path: Path, track_id: str = None): """Index a file with track_id Args: rag: LightRAG instance file_path: Path to the saved file track_id: Optional tracking ID """ try: success, returned_track_id = await pipeline_enqueue_file( rag, file_path, track_id ) if success: await rag.apipeline_process_enqueue_documents() except Exception as e: logger.error(f"Error indexing file {file_path.name}: {str(e)}") logger.error(traceback.format_exc()) async def pipeline_index_files( rag: LightRAG, file_paths: List[Path], track_id: str = None ): """Index multiple files sequentially to avoid high CPU load Args: rag: LightRAG instance file_paths: Paths to the files to index track_id: Optional tracking ID to pass to all files """ if not file_paths: return try: enqueued = False # Use get_pinyin_sort_key for Chinese pinyin sorting sorted_file_paths = sorted( file_paths, key=lambda p: get_pinyin_sort_key(str(p)) ) # Process files sequentially with track_id for file_path in sorted_file_paths: success, _ = await pipeline_enqueue_file(rag, file_path, track_id) if success: enqueued = True # Process the queue only if at least one file was successfully enqueued if enqueued: await rag.apipeline_process_enqueue_documents() except Exception as e: logger.error(f"Error indexing files: {str(e)}") logger.error(traceback.format_exc()) async def pipeline_index_texts( rag: LightRAG, texts: List[str], file_sources: List[str] = None, track_id: str = None, ): """Index a list of texts with track_id Args: rag: LightRAG instance texts: The texts to index file_sources: Sources of the texts track_id: Optional tracking ID """ if not texts: return normalized_file_sources: list[str] | None = None if file_sources: normalized_file_sources = [ normalize_file_path(source) for source in file_sources ] if len(normalized_file_sources) > len(texts): raise ValueError("Number of file sources must not exceed number of texts") if len(normalized_file_sources) < len(texts): normalized_file_sources.extend( [UNKNOWN_FILE_SOURCE] * (len(texts) - len(normalized_file_sources)) ) await rag.apipeline_enqueue_documents( input=texts, file_paths=normalized_file_sources, track_id=track_id ) await rag.apipeline_process_enqueue_documents() async def run_scanning_process( rag: LightRAG, doc_manager: DocumentManager, track_id: str = None ): """Background task to scan and index documents Args: rag: LightRAG instance doc_manager: DocumentManager instance track_id: Optional tracking ID to pass to all scanned files """ try: new_files = doc_manager.scan_directory_for_new_files() total_files = len(new_files) logger.info(f"Found {total_files} files to index.") if new_files: # Check for files with PROCESSED status and filter them out valid_files = [] processed_files = [] for file_path in new_files: filename = file_path.name existing_doc_data = await rag.doc_status.get_doc_by_file_path(filename) if existing_doc_data and existing_doc_data.get("status") == "processed": # File is already PROCESSED, skip it with warning processed_files.append(filename) logger.warning(f"Skipping already processed file: {filename}") else: # File is new or in non-PROCESSED status, add to processing list valid_files.append(file_path) # Process valid files (new files + non-PROCESSED status files) if valid_files: await pipeline_index_files(rag, valid_files, track_id) if processed_files: logger.info( f"Scanning process completed: {len(valid_files)} files Processed {len(processed_files)} skipped." ) else: logger.info( f"Scanning process completed: {len(valid_files)} files Processed." ) else: logger.info( "No files to process after filtering already processed files." ) else: # No new files to index, check if there are any documents in the queue logger.info( "No upload file found, check if there are any documents in the queue..." ) await rag.apipeline_process_enqueue_documents() except Exception as e: logger.error(f"Error during scanning process: {str(e)}") logger.error(traceback.format_exc()) async def background_delete_documents( rag: LightRAG, doc_manager: DocumentManager, doc_ids: List[str], delete_file: bool = False, delete_llm_cache: bool = False, ): """Background task to delete multiple documents""" from lightrag.kg.shared_storage import ( get_namespace_data, get_namespace_lock, ) pipeline_status = await get_namespace_data( "pipeline_status", workspace=rag.workspace ) pipeline_status_lock = get_namespace_lock( "pipeline_status", workspace=rag.workspace ) total_docs = len(doc_ids) successful_deletions = [] failed_deletions = [] # Double-check pipeline status before proceeding async with pipeline_status_lock: if pipeline_status.get("busy", False): logger.warning("Error: Unexpected pipeline busy state, aborting deletion.") return # Abort deletion operation # Set pipeline status to busy for deletion pipeline_status.update( { "busy": True, # Job name can not be changed, it's verified in adelete_by_doc_id() "job_name": f"Deleting {total_docs} Documents", "job_start": datetime.now().isoformat(), "docs": total_docs, "batchs": total_docs, "cur_batch": 0, "latest_message": "Starting document deletion process", } ) # Use slice assignment to clear the list in place pipeline_status["history_messages"][:] = ["Starting document deletion process"] if delete_llm_cache: pipeline_status["history_messages"].append( "LLM cache cleanup requested for this deletion job" ) try: # Loop through each document ID and delete them one by one for i, doc_id in enumerate(doc_ids, 1): # Check for cancellation at the start of each document deletion async with pipeline_status_lock: if pipeline_status.get("cancellation_requested", False): cancel_msg = f"Deletion cancelled by user at document {i}/{total_docs}. {len(successful_deletions)} deleted, {total_docs - i + 1} remaining." logger.info(cancel_msg) pipeline_status["latest_message"] = cancel_msg pipeline_status["history_messages"].append(cancel_msg) # Add remaining documents to failed list with cancellation reason failed_deletions.extend( doc_ids[i - 1 :] ) # i-1 because enumerate starts at 1 break # Exit the loop, remaining documents unchanged start_msg = f"Deleting document {i}/{total_docs}: {doc_id}" logger.info(start_msg) pipeline_status["cur_batch"] = i pipeline_status["latest_message"] = start_msg pipeline_status["history_messages"].append(start_msg) file_path = "#" try: result = await rag.adelete_by_doc_id( doc_id, delete_llm_cache=delete_llm_cache ) file_path = ( getattr(result, "file_path", "-") if "result" in locals() else "-" ) if result.status == "success": successful_deletions.append(doc_id) success_msg = ( f"Document deleted {i}/{total_docs}: {doc_id}[{file_path}]" ) logger.info(success_msg) async with pipeline_status_lock: pipeline_status["history_messages"].append(success_msg) # Handle file deletion if requested and file_path is available if ( delete_file and result.file_path and result.file_path != "unknown_source" ): try: deleted_files = [] # SECURITY FIX: Use secure path validation to prevent arbitrary file deletion safe_file_path = validate_file_path_security( result.file_path, doc_manager.input_dir ) if safe_file_path is None: # Security violation detected - log and skip file deletion security_msg = f"Security violation: Unsafe file path detected for deletion - {result.file_path}" logger.warning(security_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = security_msg pipeline_status["history_messages"].append( security_msg ) else: # check and delete files from input_dir directory if safe_file_path.exists(): try: safe_file_path.unlink() deleted_files.append(safe_file_path.name) file_delete_msg = f"Successfully deleted input_dir file: {result.file_path}" logger.info(file_delete_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = ( file_delete_msg ) pipeline_status["history_messages"].append( file_delete_msg ) except Exception as file_error: file_error_msg = f"Failed to delete input_dir file {result.file_path}: {str(file_error)}" logger.debug(file_error_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = ( file_error_msg ) pipeline_status["history_messages"].append( file_error_msg ) # Also check and delete files from __enqueued__ directory enqueued_dir = doc_manager.input_dir / "__enqueued__" if enqueued_dir.exists(): # SECURITY FIX: Validate that the file path is safe before processing # Only proceed if the original path validation passed base_name = Path(result.file_path).stem extension = Path(result.file_path).suffix # Search for exact match and files with numeric suffixes for enqueued_file in enqueued_dir.glob( f"{base_name}*{extension}" ): # Additional security check: ensure enqueued file is within enqueued directory safe_enqueued_path = ( validate_file_path_security( enqueued_file.name, enqueued_dir ) ) if safe_enqueued_path is not None: try: enqueued_file.unlink() deleted_files.append(enqueued_file.name) logger.info( f"Successfully deleted enqueued file: {enqueued_file.name}" ) except Exception as enqueued_error: file_error_msg = f"Failed to delete enqueued file {enqueued_file.name}: {str(enqueued_error)}" logger.debug(file_error_msg) async with pipeline_status_lock: pipeline_status[ "latest_message" ] = file_error_msg pipeline_status[ "history_messages" ].append(file_error_msg) else: security_msg = f"Security violation: Unsafe enqueued file path detected - {enqueued_file.name}" logger.warning(security_msg) if deleted_files == []: file_error_msg = f"File deletion skipped, missing or unsafe file: {result.file_path}" logger.warning(file_error_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = file_error_msg pipeline_status["history_messages"].append( file_error_msg ) except Exception as file_error: file_error_msg = f"Failed to delete file {result.file_path}: {str(file_error)}" logger.error(file_error_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = file_error_msg pipeline_status["history_messages"].append( file_error_msg ) elif delete_file: no_file_msg = ( f"File deletion skipped, missing file path: {doc_id}" ) logger.warning(no_file_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = no_file_msg pipeline_status["history_messages"].append(no_file_msg) else: failed_deletions.append(doc_id) error_msg = f"Failed to delete {i}/{total_docs}: {doc_id}[{file_path}] - {result.message}" logger.error(error_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = error_msg pipeline_status["history_messages"].append(error_msg) except Exception as e: failed_deletions.append(doc_id) error_msg = f"Error deleting document {i}/{total_docs}: {doc_id}[{file_path}] - {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) async with pipeline_status_lock: pipeline_status["latest_message"] = error_msg pipeline_status["history_messages"].append(error_msg) except Exception as e: error_msg = f"Critical error during batch deletion: {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) async with pipeline_status_lock: pipeline_status["history_messages"].append(error_msg) finally: # Final summary and check for pending requests async with pipeline_status_lock: pipeline_status["busy"] = False pipeline_status["pending_requests"] = False # Reset pending requests flag pipeline_status["cancellation_requested"] = ( False # Always reset cancellation flag ) completion_msg = f"Deletion completed: {len(successful_deletions)} successful, {len(failed_deletions)} failed" pipeline_status["latest_message"] = completion_msg pipeline_status["history_messages"].append(completion_msg) # Check if there are pending document indexing requests has_pending_request = pipeline_status.get("request_pending", False) # If there are pending requests, start document processing pipeline if has_pending_request: try: logger.info( "Processing pending document indexing requests after deletion" ) await rag.apipeline_process_enqueue_documents() except Exception as e: logger.error(f"Error processing pending documents after deletion: {e}") def create_document_routes( rag: LightRAG, doc_manager: DocumentManager, api_key: Optional[str] = None ): # Create combined auth dependency for document routes combined_auth = get_combined_auth_dependency(api_key) @router.post( "/scan", response_model=ScanResponse, dependencies=[Depends(combined_auth)] ) async def scan_for_new_documents(background_tasks: BackgroundTasks): """ Trigger the scanning process for new documents. This endpoint initiates a background task that scans the input directory for new documents and processes them. If a scanning process is already running, it returns a status indicating that fact. Returns: ScanResponse: A response object containing the scanning status and track_id """ # Generate track_id with "scan" prefix for scanning operation track_id = generate_track_id("scan") # Start the scanning process in the background with track_id background_tasks.add_task(run_scanning_process, rag, doc_manager, track_id) return ScanResponse( status="scanning_started", message="Scanning process has been initiated in the background", track_id=track_id, ) @router.post( "/upload", response_model=InsertResponse, dependencies=[Depends(combined_auth)] ) async def upload_to_input_dir( background_tasks: BackgroundTasks, file: UploadFile = File(...) ): """ Upload a file to the input directory and index it. This API endpoint accepts a file through an HTTP POST request, checks if the uploaded file is of a supported type, saves it in the specified input directory, indexes it for retrieval, and returns a success status with relevant details. **File Size Limit:** - Configurable via `MAX_UPLOAD_SIZE` environment variable (default: 100MB) - Set to `None` or `0` for unlimited upload size - Returns HTTP 413 (Request Entity Too Large) if file exceeds limit **Duplicate Detection Behavior:** This endpoint handles two types of duplicate scenarios differently: 1. **Filename Duplicate (Synchronous Detection)**: - Detected immediately before file processing - Returns `status="duplicated"` with the existing document's track_id - Two cases: - If filename exists in document storage: returns existing track_id - If filename exists in file system only: returns empty track_id ("") 2. **Content Duplicate (Asynchronous Detection)**: - Detected during background processing after content extraction - Returns `status="success"` with a new track_id immediately - The duplicate is detected later when processing the file content - Use `/documents/track_status/{track_id}` to check the final result: - Document will have `status="FAILED"` - `error_msg` contains "Content already exists. Original doc_id: xxx" - `metadata.is_duplicate=true` with reference to original document - `metadata.original_doc_id` points to the existing document - `metadata.original_track_id` shows the original upload's track_id **Why Different Behavior?** - Filename check is fast (simple lookup), done synchronously - Content extraction is expensive (PDF/DOCX parsing), done asynchronously - This design prevents blocking the client during expensive operations Args: background_tasks: FastAPI BackgroundTasks for async processing file (UploadFile): The file to be uploaded. It must have an allowed extension. Returns: InsertResponse: A response object containing the upload status and a message. - status="success": File accepted and queued for processing - status="duplicated": Filename already exists (see track_id for existing document) Raises: HTTPException: If the file type is not supported (400), file too large (413), or other errors occur (500). """ try: # Sanitize filename to prevent Path Traversal attacks safe_filename = sanitize_filename(file.filename, doc_manager.input_dir) if not doc_manager.is_supported_file(safe_filename): raise HTTPException( status_code=400, detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}", ) # Check file size limit (if configured) if ( global_args.max_upload_size is not None and global_args.max_upload_size > 0 ): # Safe access to file size (not available in older Starlette versions) file_size = getattr(file, "size", None) # Pre-flight size check (only if size is available) if file_size is not None: if file_size > global_args.max_upload_size: raise HTTPException( status_code=413, detail=f"File too large. Maximum size: {global_args.max_upload_size / 1024 / 1024:.1f}MB, uploaded: {file_size / 1024 / 1024:.1f}MB", ) else: # If size not available, we'll check during streaming logger.debug( f"File size not available in UploadFile for {safe_filename}, will check during streaming" ) # Check if filename already exists in doc_status storage existing_doc_data = await rag.doc_status.get_doc_by_file_path(safe_filename) if existing_doc_data: # Get document status and track_id from existing document status = existing_doc_data.get("status", "unknown") # Use `or ""` to handle both missing key and None value (e.g., legacy rows without track_id) existing_track_id = existing_doc_data.get("track_id") or "" return InsertResponse( status="duplicated", message=f"File '{safe_filename}' already exists in document storage (Status: {status}).", track_id=existing_track_id, ) file_path = doc_manager.input_dir / safe_filename # Check if file already exists in file system if file_path.exists(): return InsertResponse( status="duplicated", message=f"File '{safe_filename}' already exists in the input directory.", track_id="", ) # Async streaming write with size check bytes_written = 0 chunk_size = 1024 * 1024 # 1MB chunks needs_cleanup = False async with aiofiles.open(file_path, "wb") as out_file: while True: # Read chunk from upload stream chunk = await file.read(chunk_size) if not chunk: break # Check size limit during streaming (if not checked before) if ( global_args.max_upload_size is not None and global_args.max_upload_size > 0 ): bytes_written += len(chunk) if bytes_written > global_args.max_upload_size: needs_cleanup = True break # Write chunk to file await out_file.write(chunk) # Cleanup after file is closed if needs_cleanup: try: file_path.unlink() except Exception as cleanup_error: logger.error( f"Error cleaning up oversized file {safe_filename}: {cleanup_error}" ) raise HTTPException( status_code=413, detail=f"File too large. Maximum size: {global_args.max_upload_size / 1024 / 1024:.1f}MB, uploaded: {bytes_written / 1024 / 1024:.1f}MB", ) track_id = generate_track_id("upload") # Add to background tasks and get track_id background_tasks.add_task(pipeline_index_file, rag, file_path, track_id) return InsertResponse( status="success", message=f"File '{safe_filename}' uploaded successfully. Processing will continue in background.", track_id=track_id, ) except HTTPException: # Re-raise HTTP exceptions (400, 413, etc.) raise except Exception as e: logger.error(f"Error /documents/upload: {file.filename}: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/text", response_model=InsertResponse, dependencies=[Depends(combined_auth)] ) async def insert_text( request: InsertTextRequest, background_tasks: BackgroundTasks ): """ Insert text into the RAG system. This endpoint allows you to insert text data into the RAG system for later retrieval and use in generating responses. Args: request (InsertTextRequest): The request body containing the text to be inserted. background_tasks: FastAPI BackgroundTasks for async processing Returns: InsertResponse: A response object containing the status of the operation. Raises: HTTPException: If an error occurs during text processing (500). """ try: # Check if file_source already exists in doc_status storage if ( request.file_source and request.file_source.strip() and request.file_source != "unknown_source" ): existing_doc_data = await rag.doc_status.get_doc_by_file_path( request.file_source ) if existing_doc_data: # Get document status and track_id from existing document status = existing_doc_data.get("status", "unknown") # Use `or ""` to handle both missing key and None value (e.g., legacy rows without track_id) existing_track_id = existing_doc_data.get("track_id") or "" return InsertResponse( status="duplicated", message=f"File source '{request.file_source}' already exists in document storage (Status: {status}).", track_id=existing_track_id, ) # Check if content already exists by computing content hash (doc_id) sanitized_text = sanitize_text_for_encoding(request.text) content_doc_id = compute_mdhash_id(sanitized_text, prefix="doc-") existing_doc = await rag.doc_status.get_by_id(content_doc_id) if existing_doc: # Content already exists, return duplicated with existing track_id status = existing_doc.get("status", "unknown") existing_track_id = existing_doc.get("track_id") or "" return InsertResponse( status="duplicated", message=f"Identical content already exists in document storage (doc_id: {content_doc_id}, Status: {status}).", track_id=existing_track_id, ) # Generate track_id for text insertion track_id = generate_track_id("insert") background_tasks.add_task( pipeline_index_texts, rag, [request.text], file_sources=[request.file_source], track_id=track_id, ) return InsertResponse( status="success", message="Text successfully received. Processing will continue in background.", track_id=track_id, ) except Exception as e: logger.error(f"Error /documents/text: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/texts", response_model=InsertResponse, dependencies=[Depends(combined_auth)], ) async def insert_texts( request: InsertTextsRequest, background_tasks: BackgroundTasks ): """ Insert multiple texts into the RAG system. This endpoint allows you to insert multiple text entries into the RAG system in a single request. Args: request (InsertTextsRequest): The request body containing the list of texts. background_tasks: FastAPI BackgroundTasks for async processing Returns: InsertResponse: A response object containing the status of the operation. Raises: HTTPException: If an error occurs during text processing (500). """ try: # Check if any file_sources already exist in doc_status storage if request.file_sources: for file_source in request.file_sources: if ( file_source and file_source.strip() and file_source != "unknown_source" ): existing_doc_data = await rag.doc_status.get_doc_by_file_path( file_source ) if existing_doc_data: # Get document status and track_id from existing document status = existing_doc_data.get("status", "unknown") # Use `or ""` to handle both missing key and None value (e.g., legacy rows without track_id) existing_track_id = existing_doc_data.get("track_id") or "" return InsertResponse( status="duplicated", message=f"File source '{file_source}' already exists in document storage (Status: {status}).", track_id=existing_track_id, ) # Check if any content already exists by computing content hash (doc_id) for text in request.texts: sanitized_text = sanitize_text_for_encoding(text) content_doc_id = compute_mdhash_id(sanitized_text, prefix="doc-") existing_doc = await rag.doc_status.get_by_id(content_doc_id) if existing_doc: # Content already exists, return duplicated with existing track_id status = existing_doc.get("status", "unknown") existing_track_id = existing_doc.get("track_id") or "" return InsertResponse( status="duplicated", message=f"Identical content already exists in document storage (doc_id: {content_doc_id}, Status: {status}).", track_id=existing_track_id, ) # Generate track_id for texts insertion track_id = generate_track_id("insert") background_tasks.add_task( pipeline_index_texts, rag, request.texts, file_sources=request.file_sources, track_id=track_id, ) return InsertResponse( status="success", message="Texts successfully received. Processing will continue in background.", track_id=track_id, ) except Exception as e: logger.error(f"Error /documents/texts: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.delete( "", response_model=ClearDocumentsResponse, dependencies=[Depends(combined_auth)] ) async def clear_documents(): """ Clear all documents from the RAG system. This endpoint deletes all documents, entities, relationships, and files from the system. It uses the storage drop methods to properly clean up all data and removes all files from the input directory. Returns: ClearDocumentsResponse: A response object containing the status and message. - status="success": All documents and files were successfully cleared. - status="partial_success": Document clear job exit with some errors. - status="busy": Operation could not be completed because the pipeline is busy. - status="fail": All storage drop operations failed, with message - message: Detailed information about the operation results, including counts of deleted files and any errors encountered. Raises: HTTPException: Raised when a serious error occurs during the clearing process, with status code 500 and error details in the detail field. """ from lightrag.kg.shared_storage import ( get_namespace_data, get_namespace_lock, ) # Get pipeline status and lock pipeline_status = await get_namespace_data( "pipeline_status", workspace=rag.workspace ) pipeline_status_lock = get_namespace_lock( "pipeline_status", workspace=rag.workspace ) # Check and set status with lock async with pipeline_status_lock: if pipeline_status.get("busy", False): return ClearDocumentsResponse( status="busy", message="Cannot clear documents while pipeline is busy", ) # Set busy to true pipeline_status.update( { "busy": True, "job_name": "Clearing Documents", "job_start": datetime.now().isoformat(), "docs": 0, "batchs": 0, "cur_batch": 0, "request_pending": False, # Clear any previous request "latest_message": "Starting document clearing process", } ) # Cleaning history_messages without breaking it as a shared list object del pipeline_status["history_messages"][:] pipeline_status["history_messages"].append( "Starting document clearing process" ) try: # Use drop method to clear all data drop_tasks = [] storages = [ rag.text_chunks, rag.full_docs, rag.full_entities, rag.full_relations, rag.entity_chunks, rag.relation_chunks, rag.entities_vdb, rag.relationships_vdb, rag.chunks_vdb, rag.chunk_entity_relation_graph, rag.doc_status, ] # Log storage drop start if "history_messages" in pipeline_status: pipeline_status["history_messages"].append( "Starting to drop storage components" ) for storage in storages: if storage is not None: drop_tasks.append(storage.drop()) # Wait for all drop tasks to complete drop_results = await asyncio.gather(*drop_tasks, return_exceptions=True) # Check for errors and log results errors = [] storage_success_count = 0 storage_error_count = 0 for i, result in enumerate(drop_results): storage_name = storages[i].__class__.__name__ if isinstance(result, Exception): error_msg = f"Error dropping {storage_name}: {str(result)}" errors.append(error_msg) logger.error(error_msg) storage_error_count += 1 else: namespace = storages[i].namespace workspace = storages[i].workspace logger.info( f"Successfully dropped {storage_name}: {workspace}/{namespace}" ) storage_success_count += 1 # Log storage drop results if "history_messages" in pipeline_status: if storage_error_count > 0: pipeline_status["history_messages"].append( f"Dropped {storage_success_count} storage components with {storage_error_count} errors" ) else: pipeline_status["history_messages"].append( f"Successfully dropped all {storage_success_count} storage components" ) # If all storage operations failed, return error status and don't proceed with file deletion if storage_success_count == 0 and storage_error_count > 0: error_message = "All storage drop operations failed. Aborting document clearing process." logger.error(error_message) if "history_messages" in pipeline_status: pipeline_status["history_messages"].append(error_message) return ClearDocumentsResponse(status="fail", message=error_message) # Log file deletion start if "history_messages" in pipeline_status: pipeline_status["history_messages"].append( "Starting to delete files in input directory" ) # Delete only files in the current directory, preserve files in subdirectories deleted_files_count = 0 file_errors_count = 0 for file_path in doc_manager.input_dir.glob("*"): if file_path.is_file(): try: file_path.unlink() deleted_files_count += 1 except Exception as e: logger.error(f"Error deleting file {file_path}: {str(e)}") file_errors_count += 1 # Log file deletion results if "history_messages" in pipeline_status: if file_errors_count > 0: pipeline_status["history_messages"].append( f"Deleted {deleted_files_count} files with {file_errors_count} errors" ) errors.append(f"Failed to delete {file_errors_count} files") else: pipeline_status["history_messages"].append( f"Successfully deleted {deleted_files_count} files" ) # Prepare final result message final_message = "" if errors: final_message = f"Cleared documents with some errors. Deleted {deleted_files_count} files." status = "partial_success" else: final_message = f"All documents cleared successfully. Deleted {deleted_files_count} files." status = "success" # Log final result if "history_messages" in pipeline_status: pipeline_status["history_messages"].append(final_message) # Return response based on results return ClearDocumentsResponse(status=status, message=final_message) except Exception as e: error_msg = f"Error clearing documents: {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) if "history_messages" in pipeline_status: pipeline_status["history_messages"].append(error_msg) raise HTTPException(status_code=500, detail=str(e)) finally: # Reset busy status after completion async with pipeline_status_lock: pipeline_status["busy"] = False completion_msg = "Document clearing process completed" pipeline_status["latest_message"] = completion_msg if "history_messages" in pipeline_status: pipeline_status["history_messages"].append(completion_msg) @router.get( "/pipeline_status", dependencies=[Depends(combined_auth)], response_model=PipelineStatusResponse, ) async def get_pipeline_status() -> PipelineStatusResponse: """ Get the current status of the document indexing pipeline. This endpoint returns information about the current state of the document processing pipeline, including the processing status, progress information, and history messages. Returns: PipelineStatusResponse: A response object containing: - autoscanned (bool): Whether auto-scan has started - busy (bool): Whether the pipeline is currently busy - job_name (str): Current job name (e.g., indexing files/indexing texts) - job_start (str, optional): Job start time as ISO format string - docs (int): Total number of documents to be indexed - batchs (int): Number of batches for processing documents - cur_batch (int): Current processing batch - request_pending (bool): Flag for pending request for processing - latest_message (str): Latest message from pipeline processing - history_messages (List[str], optional): List of history messages (limited to latest 1000 entries, with truncation message if more than 1000 messages exist) Raises: HTTPException: If an error occurs while retrieving pipeline status (500) """ try: from lightrag.kg.shared_storage import ( get_namespace_data, get_namespace_lock, get_all_update_flags_status, ) pipeline_status = await get_namespace_data( "pipeline_status", workspace=rag.workspace ) pipeline_status_lock = get_namespace_lock( "pipeline_status", workspace=rag.workspace ) # Get update flags status for all namespaces update_status = await get_all_update_flags_status(workspace=rag.workspace) # Convert MutableBoolean objects to regular boolean values processed_update_status = {} for namespace, flags in update_status.items(): processed_flags = [] for flag in flags: # Handle both multiprocess and single process cases if hasattr(flag, "value"): processed_flags.append(bool(flag.value)) else: processed_flags.append(bool(flag)) processed_update_status[namespace] = processed_flags async with pipeline_status_lock: # Convert to regular dict if it's a Manager.dict status_dict = dict(pipeline_status) # Add processed update_status to the status dictionary status_dict["update_status"] = processed_update_status # Convert history_messages to a regular list if it's a Manager.list # and limit to latest 1000 entries with truncation message if needed if "history_messages" in status_dict: history_list = list(status_dict["history_messages"]) total_count = len(history_list) if total_count > 1000: # Calculate truncated message count truncated_count = total_count - 1000 # Take only the latest 1000 messages latest_messages = history_list[-1000:] # Add truncation message at the beginning truncation_message = ( f"[Truncated history messages: {truncated_count}/{total_count}]" ) status_dict["history_messages"] = [ truncation_message ] + latest_messages else: # No truncation needed, return all messages status_dict["history_messages"] = history_list # Ensure job_start is properly formatted as a string with timezone information if "job_start" in status_dict and status_dict["job_start"]: # Use format_datetime to ensure consistent formatting status_dict["job_start"] = format_datetime(status_dict["job_start"]) return PipelineStatusResponse(**status_dict) except Exception as e: logger.error(f"Error getting pipeline status: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) # TODO: Deprecated, use /documents/paginated instead @router.get( "", response_model=DocsStatusesResponse, dependencies=[Depends(combined_auth)] ) async def documents() -> DocsStatusesResponse: """ Get the status of all documents in the system. This endpoint is deprecated; use /documents/paginated instead. To prevent excessive resource consumption, a maximum of 1,000 records is returned. This endpoint retrieves the current status of all documents, grouped by their processing status (PENDING, PROCESSING, PREPROCESSED, PROCESSED, FAILED). The results are limited to 1000 total documents with fair distribution across all statuses. Returns: DocsStatusesResponse: A response object containing a dictionary where keys are DocStatus values and values are lists of DocStatusResponse objects representing documents in each status category. Maximum 1000 documents total will be returned. Raises: HTTPException: If an error occurs while retrieving document statuses (500). """ try: statuses = ( DocStatus.PENDING, DocStatus.PROCESSING, DocStatus.PREPROCESSED, DocStatus.PROCESSED, DocStatus.FAILED, ) tasks = [rag.get_docs_by_status(status) for status in statuses] results: List[Dict[str, DocProcessingStatus]] = await asyncio.gather(*tasks) response = DocsStatusesResponse() total_documents = 0 max_documents = 1000 # Convert results to lists for easier processing status_documents = [] for idx, result in enumerate(results): status = statuses[idx] docs_list = [] for doc_id, doc_status in result.items(): docs_list.append((doc_id, doc_status)) status_documents.append((status, docs_list)) # Fair distribution: round-robin across statuses status_indices = [0] * len( status_documents ) # Track current index for each status current_status_idx = 0 while total_documents < max_documents: # Check if we have any documents left to process has_remaining = False for status_idx, (status, docs_list) in enumerate(status_documents): if status_indices[status_idx] < len(docs_list): has_remaining = True break if not has_remaining: break # Try to get a document from the current status status, docs_list = status_documents[current_status_idx] current_index = status_indices[current_status_idx] if current_index < len(docs_list): doc_id, doc_status = docs_list[current_index] if status not in response.statuses: response.statuses[status] = [] response.statuses[status].append( DocStatusResponse( id=doc_id, content_summary=doc_status.content_summary, content_length=doc_status.content_length, status=doc_status.status, created_at=format_datetime(doc_status.created_at), updated_at=format_datetime(doc_status.updated_at), track_id=doc_status.track_id, chunks_count=doc_status.chunks_count, error_msg=doc_status.error_msg, metadata=doc_status.metadata, file_path=normalize_file_path(doc_status.file_path), ) ) status_indices[current_status_idx] += 1 total_documents += 1 # Move to next status (round-robin) current_status_idx = (current_status_idx + 1) % len(status_documents) return response except Exception as e: logger.error(f"Error GET /documents: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) class DeleteDocByIdResponse(BaseModel): """Response model for single document deletion operation.""" status: Literal["deletion_started", "busy", "not_allowed"] = Field( description="Status of the deletion operation" ) message: str = Field(description="Message describing the operation result") doc_id: str = Field(description="The ID of the document to delete") @router.delete( "/delete_document", response_model=DeleteDocByIdResponse, dependencies=[Depends(combined_auth)], summary="Delete a document and all its associated data by its ID.", ) async def delete_document( delete_request: DeleteDocRequest, background_tasks: BackgroundTasks, ) -> DeleteDocByIdResponse: """ Delete documents and all their associated data by their IDs using background processing. Deletes specific documents and all their associated data, including their status, text chunks, vector embeddings, and any related graph data. When requested, cached LLM extraction responses are removed after graph deletion/rebuild completes. The deletion process runs in the background to avoid blocking the client connection. This operation is irreversible and will interact with the pipeline status. Args: delete_request (DeleteDocRequest): The request containing the document IDs and deletion options. background_tasks: FastAPI BackgroundTasks for async processing Returns: DeleteDocByIdResponse: The result of the deletion operation. - status="deletion_started": The document deletion has been initiated in the background. - status="busy": The pipeline is busy with another operation. Raises: HTTPException: - 500: If an unexpected internal error occurs during initialization. """ doc_ids = delete_request.doc_ids try: from lightrag.kg.shared_storage import ( get_namespace_data, get_namespace_lock, ) pipeline_status = await get_namespace_data( "pipeline_status", workspace=rag.workspace ) pipeline_status_lock = get_namespace_lock( "pipeline_status", workspace=rag.workspace ) # Check if pipeline is busy with proper lock async with pipeline_status_lock: if pipeline_status.get("busy", False): return DeleteDocByIdResponse( status="busy", message="Cannot delete documents while pipeline is busy", doc_id=", ".join(doc_ids), ) # Add deletion task to background tasks background_tasks.add_task( background_delete_documents, rag, doc_manager, doc_ids, delete_request.delete_file, delete_request.delete_llm_cache, ) return DeleteDocByIdResponse( status="deletion_started", message=f"Document deletion for {len(doc_ids)} documents has been initiated. Processing will continue in background.", doc_id=", ".join(doc_ids), ) except Exception as e: error_msg = f"Error initiating document deletion for {delete_request.doc_ids}: {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=error_msg) @router.post( "/clear_cache", response_model=ClearCacheResponse, dependencies=[Depends(combined_auth)], ) async def clear_cache(request: ClearCacheRequest): """ Clear all cache data from the LLM response cache storage. This endpoint clears all cached LLM responses regardless of mode. The request body is accepted for API compatibility but is ignored. Args: request (ClearCacheRequest): The request body (ignored for compatibility). Returns: ClearCacheResponse: A response object containing the status and message. Raises: HTTPException: If an error occurs during cache clearing (500). """ try: # Call the aclear_cache method (no modes parameter) await rag.aclear_cache() # Prepare success message message = "Successfully cleared all cache" return ClearCacheResponse(status="success", message=message) except Exception as e: logger.error(f"Error clearing cache: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.delete( "/delete_entity", response_model=DeletionResult, dependencies=[Depends(combined_auth)], ) async def delete_entity(request: DeleteEntityRequest): """ Delete an entity and all its relationships from the knowledge graph. Args: request (DeleteEntityRequest): The request body containing the entity name. Returns: DeletionResult: An object containing the outcome of the deletion process. Raises: HTTPException: If the entity is not found (404) or an error occurs (500). """ try: result = await rag.adelete_by_entity(entity_name=request.entity_name) if result.status == "not_found": raise HTTPException(status_code=404, detail=result.message) if result.status == "fail": raise HTTPException(status_code=500, detail=result.message) # Set doc_id to empty string since this is an entity operation, not document result.doc_id = "" return result except HTTPException: raise except Exception as e: error_msg = f"Error deleting entity '{request.entity_name}': {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=error_msg) @router.delete( "/delete_relation", response_model=DeletionResult, dependencies=[Depends(combined_auth)], ) async def delete_relation(request: DeleteRelationRequest): """ Delete a relationship between two entities from the knowledge graph. Args: request (DeleteRelationRequest): The request body containing the source and target entity names. Returns: DeletionResult: An object containing the outcome of the deletion process. Raises: HTTPException: If the relation is not found (404) or an error occurs (500). """ try: result = await rag.adelete_by_relation( source_entity=request.source_entity, target_entity=request.target_entity, ) if result.status == "not_found": raise HTTPException(status_code=404, detail=result.message) if result.status == "fail": raise HTTPException(status_code=500, detail=result.message) # Set doc_id to empty string since this is a relation operation, not document result.doc_id = "" return result except HTTPException: raise except Exception as e: error_msg = f"Error deleting relation from '{request.source_entity}' to '{request.target_entity}': {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=error_msg) @router.get( "/track_status/{track_id}", response_model=TrackStatusResponse, dependencies=[Depends(combined_auth)], ) async def get_track_status(track_id: str) -> TrackStatusResponse: """ Get the processing status of documents by tracking ID. This endpoint retrieves all documents associated with a specific tracking ID, allowing users to monitor the processing progress of their uploaded files or inserted texts. Args: track_id (str): The tracking ID returned from upload, text, or texts endpoints Returns: TrackStatusResponse: A response object containing: - track_id: The tracking ID - documents: List of documents associated with this track_id - total_count: Total number of documents for this track_id Raises: HTTPException: If track_id is invalid (400) or an error occurs (500). """ try: # Validate track_id if not track_id or not track_id.strip(): raise HTTPException(status_code=400, detail="Track ID cannot be empty") track_id = track_id.strip() # Get documents by track_id docs_by_track_id = await rag.aget_docs_by_track_id(track_id) # Convert to response format documents = [] status_summary = {} for doc_id, doc_status in docs_by_track_id.items(): documents.append( DocStatusResponse( id=doc_id, content_summary=doc_status.content_summary, content_length=doc_status.content_length, status=doc_status.status, created_at=format_datetime(doc_status.created_at), updated_at=format_datetime(doc_status.updated_at), track_id=doc_status.track_id, chunks_count=doc_status.chunks_count, error_msg=doc_status.error_msg, metadata=doc_status.metadata, file_path=normalize_file_path(doc_status.file_path), ) ) # Build status summary # Handle both DocStatus enum and string cases for robust deserialization status_key = str(doc_status.status) status_summary[status_key] = status_summary.get(status_key, 0) + 1 return TrackStatusResponse( track_id=track_id, documents=documents, total_count=len(documents), status_summary=status_summary, ) except HTTPException: raise except Exception as e: logger.error(f"Error getting track status for {track_id}: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/paginated", response_model=PaginatedDocsResponse, dependencies=[Depends(combined_auth)], ) async def get_documents_paginated( request: DocumentsRequest, ) -> PaginatedDocsResponse: """ Get documents with pagination support. This endpoint retrieves documents with pagination, filtering, and sorting capabilities. It provides better performance for large document collections by loading only the requested page of data. Args: request (DocumentsRequest): The request body containing pagination parameters Returns: PaginatedDocsResponse: A response object containing: - documents: List of documents for the current page - pagination: Pagination information (page, total_count, etc.) - status_counts: Count of documents by status for all documents Raises: HTTPException: If an error occurs while retrieving documents (500). """ try: # Get paginated documents and status counts in parallel docs_task = rag.doc_status.get_docs_paginated( status_filter=request.status_filter, page=request.page, page_size=request.page_size, sort_field=request.sort_field, sort_direction=request.sort_direction, ) status_counts_task = rag.doc_status.get_all_status_counts() # Execute both queries in parallel (documents_with_ids, total_count), status_counts = await asyncio.gather( docs_task, status_counts_task ) # Convert documents to response format doc_responses = [] for doc_id, doc in documents_with_ids: doc_responses.append( DocStatusResponse( id=doc_id, content_summary=doc.content_summary, content_length=doc.content_length, status=doc.status, created_at=format_datetime(doc.created_at), updated_at=format_datetime(doc.updated_at), track_id=doc.track_id, chunks_count=doc.chunks_count, error_msg=doc.error_msg, metadata=doc.metadata, file_path=normalize_file_path(doc.file_path), ) ) # Calculate pagination info total_pages = (total_count + request.page_size - 1) // request.page_size has_next = request.page < total_pages has_prev = request.page > 1 pagination = PaginationInfo( page=request.page, page_size=request.page_size, total_count=total_count, total_pages=total_pages, has_next=has_next, has_prev=has_prev, ) return PaginatedDocsResponse( documents=doc_responses, pagination=pagination, status_counts=status_counts, ) except Exception as e: logger.error(f"Error getting paginated documents: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.get( "/status_counts", response_model=StatusCountsResponse, dependencies=[Depends(combined_auth)], ) async def get_document_status_counts() -> StatusCountsResponse: """ Get counts of documents by status. This endpoint retrieves the count of documents in each processing status (PENDING, PROCESSING, PROCESSED, FAILED) for all documents in the system. Returns: StatusCountsResponse: A response object containing status counts Raises: HTTPException: If an error occurs while retrieving status counts (500). """ try: status_counts = await rag.doc_status.get_all_status_counts() return StatusCountsResponse(status_counts=status_counts) except Exception as e: logger.error(f"Error getting document status counts: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/reprocess_failed", response_model=ReprocessResponse, dependencies=[Depends(combined_auth)], ) async def reprocess_failed_documents(background_tasks: BackgroundTasks): """ Reprocess failed and pending documents. This endpoint triggers the document processing pipeline which automatically picks up and reprocesses documents in the following statuses: - FAILED: Documents that failed during previous processing attempts - PENDING: Documents waiting to be processed - PROCESSING: Documents with abnormally terminated processing (e.g., server crashes) This is useful for recovering from server crashes, network errors, LLM service outages, or other temporary failures that caused document processing to fail. The processing happens in the background and can be monitored by checking the pipeline status. The reprocessed documents retain their original track_id from initial upload, so use their original track_id to monitor progress. Returns: ReprocessResponse: Response with status and message. track_id is always empty string because reprocessed documents retain their original track_id from initial upload. Raises: HTTPException: If an error occurs while initiating reprocessing (500). """ try: # Start the reprocessing in the background # Note: Reprocessed documents retain their original track_id from initial upload background_tasks.add_task(rag.apipeline_process_enqueue_documents) logger.info("Reprocessing of failed documents initiated") return ReprocessResponse( status="reprocessing_started", message="Reprocessing of failed documents has been initiated in background. Documents retain their original track_id.", ) except Exception as e: logger.error(f"Error initiating reprocessing of failed documents: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/cancel_pipeline", response_model=CancelPipelineResponse, dependencies=[Depends(combined_auth)], ) async def cancel_pipeline(): """ Request cancellation of the currently running pipeline. This endpoint sets a cancellation flag in the pipeline status. The pipeline will: 1. Check this flag at key processing points 2. Stop processing new documents 3. Cancel all running document processing tasks 4. Mark all PROCESSING documents as FAILED with reason "User cancelled" The cancellation is graceful and ensures data consistency. Documents that have completed processing will remain in PROCESSED status. Returns: CancelPipelineResponse: Response with status and message - status="cancellation_requested": Cancellation flag has been set - status="not_busy": Pipeline is not currently running Raises: HTTPException: If an error occurs while setting cancellation flag (500). """ try: from lightrag.kg.shared_storage import ( get_namespace_data, get_namespace_lock, ) pipeline_status = await get_namespace_data( "pipeline_status", workspace=rag.workspace ) pipeline_status_lock = get_namespace_lock( "pipeline_status", workspace=rag.workspace ) async with pipeline_status_lock: if not pipeline_status.get("busy", False): return CancelPipelineResponse( status="not_busy", message="Pipeline is not currently running. No cancellation needed.", ) # Set cancellation flag pipeline_status["cancellation_requested"] = True cancel_msg = "Pipeline cancellation requested by user" logger.info(cancel_msg) pipeline_status["latest_message"] = cancel_msg pipeline_status["history_messages"].append(cancel_msg) return CancelPipelineResponse( status="cancellation_requested", message="Pipeline cancellation has been requested. Documents will be marked as FAILED.", ) except Exception as e: logger.error(f"Error requesting pipeline cancellation: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) return router ================================================ FILE: lightrag/api/routers/graph_routes.py ================================================ """ This module contains all graph-related routes for the LightRAG API. """ from typing import Optional, Dict, Any import traceback from fastapi import APIRouter, Depends, Query, HTTPException from pydantic import BaseModel, Field from lightrag.utils import logger from ..utils_api import get_combined_auth_dependency router = APIRouter(tags=["graph"]) class EntityUpdateRequest(BaseModel): entity_name: str updated_data: Dict[str, Any] allow_rename: bool = False allow_merge: bool = False class RelationUpdateRequest(BaseModel): source_id: str target_id: str updated_data: Dict[str, Any] class EntityMergeRequest(BaseModel): entities_to_change: list[str] = Field( ..., description="List of entity names to be merged and deleted. These are typically duplicate or misspelled entities.", min_length=1, examples=[["Elon Msk", "Ellon Musk"]], ) entity_to_change_into: str = Field( ..., description="Target entity name that will receive all relationships from the source entities. This entity will be preserved.", min_length=1, examples=["Elon Musk"], ) class EntityCreateRequest(BaseModel): entity_name: str = Field( ..., description="Unique name for the new entity", min_length=1, examples=["Tesla"], ) entity_data: Dict[str, Any] = Field( ..., description="Dictionary containing entity properties. Common fields include 'description' and 'entity_type'.", examples=[ { "description": "Electric vehicle manufacturer", "entity_type": "ORGANIZATION", } ], ) class RelationCreateRequest(BaseModel): source_entity: str = Field( ..., description="Name of the source entity. This entity must already exist in the knowledge graph.", min_length=1, examples=["Elon Musk"], ) target_entity: str = Field( ..., description="Name of the target entity. This entity must already exist in the knowledge graph.", min_length=1, examples=["Tesla"], ) relation_data: Dict[str, Any] = Field( ..., description="Dictionary containing relationship properties. Common fields include 'description', 'keywords', and 'weight'.", examples=[ { "description": "Elon Musk is the CEO of Tesla", "keywords": "CEO, founder", "weight": 1.0, } ], ) def create_graph_routes(rag, api_key: Optional[str] = None): combined_auth = get_combined_auth_dependency(api_key) @router.get("/graph/label/list", dependencies=[Depends(combined_auth)]) async def get_graph_labels(): """ Get all graph labels Returns: List[str]: List of graph labels """ try: return await rag.get_graph_labels() except Exception as e: logger.error(f"Error getting graph labels: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error getting graph labels: {str(e)}" ) @router.get("/graph/label/popular", dependencies=[Depends(combined_auth)]) async def get_popular_labels( limit: int = Query( 300, description="Maximum number of popular labels to return", ge=1, le=1000 ), ): """ Get popular labels by node degree (most connected entities) Args: limit (int): Maximum number of labels to return (default: 300, max: 1000) Returns: List[str]: List of popular labels sorted by degree (highest first) """ try: return await rag.chunk_entity_relation_graph.get_popular_labels(limit) except Exception as e: logger.error(f"Error getting popular labels: {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error getting popular labels: {str(e)}" ) @router.get("/graph/label/search", dependencies=[Depends(combined_auth)]) async def search_labels( q: str = Query(..., description="Search query string"), limit: int = Query( 50, description="Maximum number of search results to return", ge=1, le=100 ), ): """ Search labels with fuzzy matching Args: q (str): Search query string limit (int): Maximum number of results to return (default: 50, max: 100) Returns: List[str]: List of matching labels sorted by relevance """ try: return await rag.chunk_entity_relation_graph.search_labels(q, limit) except Exception as e: logger.error(f"Error searching labels with query '{q}': {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error searching labels: {str(e)}" ) @router.get("/graphs", dependencies=[Depends(combined_auth)]) async def get_knowledge_graph( label: str = Query(..., description="Label to get knowledge graph for"), max_depth: int = Query(3, description="Maximum depth of graph", ge=1), max_nodes: int = Query(1000, description="Maximum nodes to return", ge=1), ): """ Retrieve a connected subgraph of nodes where the label includes the specified label. When reducing the number of nodes, the prioritization criteria are as follows: 1. Hops(path) to the staring node take precedence 2. Followed by the degree of the nodes Args: label (str): Label of the starting node max_depth (int, optional): Maximum depth of the subgraph,Defaults to 3 max_nodes: Maxiumu nodes to return Returns: Dict[str, List[str]]: Knowledge graph for label """ try: # Log the label parameter to check for leading spaces logger.debug( f"get_knowledge_graph called with label: '{label}' (length: {len(label)}, repr: {repr(label)})" ) return await rag.get_knowledge_graph( node_label=label, max_depth=max_depth, max_nodes=max_nodes, ) except Exception as e: logger.error(f"Error getting knowledge graph for label '{label}': {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error getting knowledge graph: {str(e)}" ) @router.get("/graph/entity/exists", dependencies=[Depends(combined_auth)]) async def check_entity_exists( name: str = Query(..., description="Entity name to check"), ): """ Check if an entity with the given name exists in the knowledge graph Args: name (str): Name of the entity to check Returns: Dict[str, bool]: Dictionary with 'exists' key indicating if entity exists """ try: exists = await rag.chunk_entity_relation_graph.has_node(name) return {"exists": exists} except Exception as e: logger.error(f"Error checking entity existence for '{name}': {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error checking entity existence: {str(e)}" ) @router.post("/graph/entity/edit", dependencies=[Depends(combined_auth)]) async def update_entity(request: EntityUpdateRequest): """ Update an entity's properties in the knowledge graph This endpoint allows updating entity properties, including renaming entities. When renaming to an existing entity name, the behavior depends on allow_merge: Args: request (EntityUpdateRequest): Request containing: - entity_name (str): Name of the entity to update - updated_data (Dict[str, Any]): Dictionary of properties to update - allow_rename (bool): Whether to allow entity renaming (default: False) - allow_merge (bool): Whether to merge into existing entity when renaming causes name conflict (default: False) Returns: Dict with the following structure: { "status": "success", "message": "Entity updated successfully" | "Entity merged successfully into 'target_name'", "data": { "entity_name": str, # Final entity name "description": str, # Entity description "entity_type": str, # Entity type "source_id": str, # Source chunk IDs ... # Other entity properties }, "operation_summary": { "merged": bool, # Whether entity was merged into another "merge_status": str, # "success" | "failed" | "not_attempted" "merge_error": str | None, # Error message if merge failed "operation_status": str, # "success" | "partial_success" | "failure" "target_entity": str | None, # Target entity name if renaming/merging "final_entity": str, # Final entity name after operation "renamed": bool # Whether entity was renamed } } operation_status values explained: - "success": All operations completed successfully * For simple updates: entity properties updated * For renames: entity renamed successfully * For merges: non-name updates applied AND merge completed - "partial_success": Update succeeded but merge failed * Non-name property updates were applied successfully * Merge operation failed (entity not merged) * Original entity still exists with updated properties * Use merge_error for failure details - "failure": Operation failed completely * If merge_status == "failed": Merge attempted but both update and merge failed * If merge_status == "not_attempted": Regular update failed * No changes were applied to the entity merge_status values explained: - "success": Entity successfully merged into target entity - "failed": Merge operation was attempted but failed - "not_attempted": No merge was attempted (normal update/rename) Behavior when renaming to an existing entity: - If allow_merge=False: Raises ValueError with 400 status (default behavior) - If allow_merge=True: Automatically merges the source entity into the existing target entity, preserving all relationships and applying non-name updates first Example Request (simple update): POST /graph/entity/edit { "entity_name": "Tesla", "updated_data": {"description": "Updated description"}, "allow_rename": false, "allow_merge": false } Example Response (simple update success): { "status": "success", "message": "Entity updated successfully", "data": { ... }, "operation_summary": { "merged": false, "merge_status": "not_attempted", "merge_error": null, "operation_status": "success", "target_entity": null, "final_entity": "Tesla", "renamed": false } } Example Request (rename with auto-merge): POST /graph/entity/edit { "entity_name": "Elon Msk", "updated_data": { "entity_name": "Elon Musk", "description": "Corrected description" }, "allow_rename": true, "allow_merge": true } Example Response (merge success): { "status": "success", "message": "Entity merged successfully into 'Elon Musk'", "data": { ... }, "operation_summary": { "merged": true, "merge_status": "success", "merge_error": null, "operation_status": "success", "target_entity": "Elon Musk", "final_entity": "Elon Musk", "renamed": true } } Example Response (partial success - update succeeded but merge failed): { "status": "success", "message": "Entity updated successfully", "data": { ... }, # Data reflects updated "Elon Msk" entity "operation_summary": { "merged": false, "merge_status": "failed", "merge_error": "Target entity locked by another operation", "operation_status": "partial_success", "target_entity": "Elon Musk", "final_entity": "Elon Msk", # Original entity still exists "renamed": true } } """ try: result = await rag.aedit_entity( entity_name=request.entity_name, updated_data=request.updated_data, allow_rename=request.allow_rename, allow_merge=request.allow_merge, ) # Extract operation_summary from result, with fallback for backward compatibility operation_summary = result.get( "operation_summary", { "merged": False, "merge_status": "not_attempted", "merge_error": None, "operation_status": "success", "target_entity": None, "final_entity": request.updated_data.get( "entity_name", request.entity_name ), "renamed": request.updated_data.get( "entity_name", request.entity_name ) != request.entity_name, }, ) # Separate entity data from operation_summary for clean response entity_data = dict(result) entity_data.pop("operation_summary", None) # Generate appropriate response message based on merge status response_message = ( f"Entity merged successfully into '{operation_summary['final_entity']}'" if operation_summary.get("merged") else "Entity updated successfully" ) return { "status": "success", "message": response_message, "data": entity_data, "operation_summary": operation_summary, } except ValueError as ve: logger.error( f"Validation error updating entity '{request.entity_name}': {str(ve)}" ) raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error(f"Error updating entity '{request.entity_name}': {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error updating entity: {str(e)}" ) @router.post("/graph/relation/edit", dependencies=[Depends(combined_auth)]) async def update_relation(request: RelationUpdateRequest): """Update a relation's properties in the knowledge graph Args: request (RelationUpdateRequest): Request containing source ID, target ID and updated data Returns: Dict: Updated relation information """ try: result = await rag.aedit_relation( source_entity=request.source_id, target_entity=request.target_id, updated_data=request.updated_data, ) return { "status": "success", "message": "Relation updated successfully", "data": result, } except ValueError as ve: logger.error( f"Validation error updating relation between '{request.source_id}' and '{request.target_id}': {str(ve)}" ) raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error( f"Error updating relation between '{request.source_id}' and '{request.target_id}': {str(e)}" ) logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error updating relation: {str(e)}" ) @router.post("/graph/entity/create", dependencies=[Depends(combined_auth)]) async def create_entity(request: EntityCreateRequest): """ Create a new entity in the knowledge graph This endpoint creates a new entity node in the knowledge graph with the specified properties. The system automatically generates vector embeddings for the entity to enable semantic search and retrieval. Request Body: entity_name (str): Unique name identifier for the entity entity_data (dict): Entity properties including: - description (str): Textual description of the entity - entity_type (str): Category/type of the entity (e.g., PERSON, ORGANIZATION, LOCATION) - source_id (str): Related chunk_id from which the description originates - Additional custom properties as needed Response Schema: { "status": "success", "message": "Entity 'Tesla' created successfully", "data": { "entity_name": "Tesla", "description": "Electric vehicle manufacturer", "entity_type": "ORGANIZATION", "source_id": "chunk-123chunk-456" ... (other entity properties) } } HTTP Status Codes: 200: Entity created successfully 400: Invalid request (e.g., missing required fields, duplicate entity) 500: Internal server error Example Request: POST /graph/entity/create { "entity_name": "Tesla", "entity_data": { "description": "Electric vehicle manufacturer", "entity_type": "ORGANIZATION" } } """ try: # Use the proper acreate_entity method which handles: # - Graph lock for concurrency # - Vector embedding creation in entities_vdb # - Metadata population and defaults # - Index consistency via _edit_entity_done result = await rag.acreate_entity( entity_name=request.entity_name, entity_data=request.entity_data, ) return { "status": "success", "message": f"Entity '{request.entity_name}' created successfully", "data": result, } except ValueError as ve: logger.error( f"Validation error creating entity '{request.entity_name}': {str(ve)}" ) raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error(f"Error creating entity '{request.entity_name}': {str(e)}") logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error creating entity: {str(e)}" ) @router.post("/graph/relation/create", dependencies=[Depends(combined_auth)]) async def create_relation(request: RelationCreateRequest): """ Create a new relationship between two entities in the knowledge graph This endpoint establishes an undirected relationship between two existing entities. The provided source/target order is accepted for convenience, but the backend stored edge is undirected and may be returned with the entities swapped. Both entities must already exist in the knowledge graph. The system automatically generates vector embeddings for the relationship to enable semantic search and graph traversal. Prerequisites: - Both source_entity and target_entity must exist in the knowledge graph - Use /graph/entity/create to create entities first if they don't exist Request Body: source_entity (str): Name of the source entity (relationship origin) target_entity (str): Name of the target entity (relationship destination) relation_data (dict): Relationship properties including: - description (str): Textual description of the relationship - keywords (str): Comma-separated keywords describing the relationship type - source_id (str): Related chunk_id from which the description originates - weight (float): Relationship strength/importance (default: 1.0) - Additional custom properties as needed Response Schema: { "status": "success", "message": "Relation created successfully between 'Elon Musk' and 'Tesla'", "data": { "src_id": "Elon Musk", "tgt_id": "Tesla", "description": "Elon Musk is the CEO of Tesla", "keywords": "CEO, founder", "source_id": "chunk-123chunk-456" "weight": 1.0, ... (other relationship properties) } } HTTP Status Codes: 200: Relationship created successfully 400: Invalid request (e.g., missing entities, invalid data, duplicate relationship) 500: Internal server error Example Request: POST /graph/relation/create { "source_entity": "Elon Musk", "target_entity": "Tesla", "relation_data": { "description": "Elon Musk is the CEO of Tesla", "keywords": "CEO, founder", "weight": 1.0 } } """ try: # Use the proper acreate_relation method which handles: # - Graph lock for concurrency # - Entity existence validation # - Duplicate relation checks # - Vector embedding creation in relationships_vdb # - Index consistency via _edit_relation_done result = await rag.acreate_relation( source_entity=request.source_entity, target_entity=request.target_entity, relation_data=request.relation_data, ) return { "status": "success", "message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'", "data": result, } except ValueError as ve: logger.error( f"Validation error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(ve)}" ) raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error( f"Error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(e)}" ) logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error creating relation: {str(e)}" ) @router.post("/graph/entities/merge", dependencies=[Depends(combined_auth)]) async def merge_entities(request: EntityMergeRequest): """ Merge multiple entities into a single entity, preserving all relationships This endpoint consolidates duplicate or misspelled entities while preserving the entire graph structure. It's particularly useful for cleaning up knowledge graphs after document processing or correcting entity name variations. What the Merge Operation Does: 1. Deletes the specified source entities from the knowledge graph 2. Transfers all relationships from source entities to the target entity 3. Intelligently merges duplicate relationships (if multiple sources have the same relationship) 4. Updates vector embeddings for accurate retrieval and search 5. Preserves the complete graph structure and connectivity 6. Maintains relationship properties and metadata Use Cases: - Fixing spelling errors in entity names (e.g., "Elon Msk" -> "Elon Musk") - Consolidating duplicate entities discovered after document processing - Merging name variations (e.g., "NY", "New York", "New York City") - Cleaning up the knowledge graph for better query performance - Standardizing entity names across the knowledge base Request Body: entities_to_change (list[str]): List of entity names to be merged and deleted entity_to_change_into (str): Target entity that will receive all relationships Response Schema: { "status": "success", "message": "Successfully merged 2 entities into 'Elon Musk'", "data": { "merged_entity": "Elon Musk", "deleted_entities": ["Elon Msk", "Ellon Musk"], "relationships_transferred": 15, ... (merge operation details) } } HTTP Status Codes: 200: Entities merged successfully 400: Invalid request (e.g., empty entity list, target entity doesn't exist) 500: Internal server error Example Request: POST /graph/entities/merge { "entities_to_change": ["Elon Msk", "Ellon Musk"], "entity_to_change_into": "Elon Musk" } Note: - The target entity (entity_to_change_into) must exist in the knowledge graph - Source entities will be permanently deleted after the merge - This operation cannot be undone, so verify entity names before merging """ try: result = await rag.amerge_entities( source_entities=request.entities_to_change, target_entity=request.entity_to_change_into, ) return { "status": "success", "message": f"Successfully merged {len(request.entities_to_change)} entities into '{request.entity_to_change_into}'", "data": result, } except ValueError as ve: logger.error( f"Validation error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(ve)}" ) raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: logger.error( f"Error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(e)}" ) logger.error(traceback.format_exc()) raise HTTPException( status_code=500, detail=f"Error merging entities: {str(e)}" ) return router ================================================ FILE: lightrag/api/routers/ollama_api.py ================================================ from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel from typing import List, Dict, Any, Optional, Type from lightrag.utils import logger import time import json import re from enum import Enum from fastapi.responses import StreamingResponse import asyncio from lightrag import LightRAG, QueryParam from lightrag.utils import TiktokenTokenizer from lightrag.api.utils_api import get_combined_auth_dependency from fastapi import Depends # query mode according to query prefix (bypass is not LightRAG quer mode) class SearchMode(str, Enum): naive = "naive" local = "local" global_ = "global" hybrid = "hybrid" mix = "mix" bypass = "bypass" context = "context" class OllamaMessage(BaseModel): role: str content: str images: Optional[List[str]] = None class OllamaChatRequest(BaseModel): model: str messages: List[OllamaMessage] stream: bool = True options: Optional[Dict[str, Any]] = None system: Optional[str] = None class OllamaChatResponse(BaseModel): model: str created_at: str message: OllamaMessage done: bool class OllamaGenerateRequest(BaseModel): model: str prompt: str system: Optional[str] = None stream: bool = False options: Optional[Dict[str, Any]] = None class OllamaGenerateResponse(BaseModel): model: str created_at: str response: str done: bool context: Optional[List[int]] total_duration: Optional[int] load_duration: Optional[int] prompt_eval_count: Optional[int] prompt_eval_duration: Optional[int] eval_count: Optional[int] eval_duration: Optional[int] class OllamaVersionResponse(BaseModel): version: str class OllamaModelDetails(BaseModel): parent_model: str format: str family: str families: List[str] parameter_size: str quantization_level: str class OllamaModel(BaseModel): name: str model: str size: int digest: str modified_at: str details: OllamaModelDetails class OllamaTagResponse(BaseModel): models: List[OllamaModel] class OllamaRunningModelDetails(BaseModel): parent_model: str format: str family: str families: List[str] parameter_size: str quantization_level: str class OllamaRunningModel(BaseModel): name: str model: str size: int digest: str details: OllamaRunningModelDetails expires_at: str size_vram: int class OllamaPsResponse(BaseModel): models: List[OllamaRunningModel] async def parse_request_body( request: Request, model_class: Type[BaseModel] ) -> BaseModel: """ Parse request body based on Content-Type header. Supports both application/json and application/octet-stream. Args: request: The FastAPI Request object model_class: The Pydantic model class to parse the request into Returns: An instance of the provided model_class """ content_type = request.headers.get("content-type", "").lower() try: if content_type.startswith("application/json"): # FastAPI already handles JSON parsing for us body = await request.json() elif content_type.startswith("application/octet-stream"): # Manually parse octet-stream as JSON body_bytes = await request.body() body = json.loads(body_bytes.decode("utf-8")) else: # Try to parse as JSON for any other content type body_bytes = await request.body() body = json.loads(body_bytes.decode("utf-8")) # Create an instance of the model return model_class(**body) except json.JSONDecodeError: raise HTTPException(status_code=400, detail="Invalid JSON in request body") except Exception as e: raise HTTPException( status_code=400, detail=f"Error parsing request body: {str(e)}" ) def estimate_tokens(text: str) -> int: """Estimate the number of tokens in text using tiktoken""" tokens = TiktokenTokenizer().encode(text) return len(tokens) def parse_query_mode(query: str) -> tuple[str, SearchMode, bool, Optional[str]]: """Parse query prefix to determine search mode Returns tuple of (cleaned_query, search_mode, only_need_context, user_prompt) Examples: - "/local[use mermaid format for diagrams] query string" -> (cleaned_query, SearchMode.local, False, "use mermaid format for diagrams") - "/[use mermaid format for diagrams] query string" -> (cleaned_query, SearchMode.hybrid, False, "use mermaid format for diagrams") - "/local query string" -> (cleaned_query, SearchMode.local, False, None) """ # Initialize user_prompt as None user_prompt = None # First check if there's a bracket format for user prompt bracket_pattern = r"^/([a-z]*)\[(.*?)\](.*)" bracket_match = re.match(bracket_pattern, query) if bracket_match: mode_prefix = bracket_match.group(1) user_prompt = bracket_match.group(2) remaining_query = bracket_match.group(3).lstrip() # Reconstruct query, removing the bracket part query = f"/{mode_prefix} {remaining_query}".strip() # Unified handling of mode and only_need_context determination mode_map = { "/local ": (SearchMode.local, False), "/global ": ( SearchMode.global_, False, ), # global_ is used because 'global' is a Python keyword "/naive ": (SearchMode.naive, False), "/hybrid ": (SearchMode.hybrid, False), "/mix ": (SearchMode.mix, False), "/bypass ": (SearchMode.bypass, False), "/context": ( SearchMode.mix, True, ), "/localcontext": (SearchMode.local, True), "/globalcontext": (SearchMode.global_, True), "/hybridcontext": (SearchMode.hybrid, True), "/naivecontext": (SearchMode.naive, True), "/mixcontext": (SearchMode.mix, True), } for prefix, (mode, only_need_context) in mode_map.items(): if query.startswith(prefix): # After removing prefix and leading spaces cleaned_query = query[len(prefix) :].lstrip() return cleaned_query, mode, only_need_context, user_prompt return query, SearchMode.mix, False, user_prompt class OllamaAPI: def __init__(self, rag: LightRAG, top_k: int = 60, api_key: Optional[str] = None): self.rag = rag self.ollama_server_infos = rag.ollama_server_infos self.top_k = top_k self.api_key = api_key self.router = APIRouter(tags=["ollama"]) self.setup_routes() def setup_routes(self): # Create combined auth dependency for Ollama API routes combined_auth = get_combined_auth_dependency(self.api_key) @self.router.get("/version", dependencies=[Depends(combined_auth)]) async def get_version(): """Get Ollama version information""" return OllamaVersionResponse(version="0.9.3") @self.router.get("/tags", dependencies=[Depends(combined_auth)]) async def get_tags(): """Return available models acting as an Ollama server""" return OllamaTagResponse( models=[ { "name": self.ollama_server_infos.LIGHTRAG_MODEL, "model": self.ollama_server_infos.LIGHTRAG_MODEL, "modified_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "size": self.ollama_server_infos.LIGHTRAG_SIZE, "digest": self.ollama_server_infos.LIGHTRAG_DIGEST, "details": { "parent_model": "", "format": "gguf", "family": self.ollama_server_infos.LIGHTRAG_NAME, "families": [self.ollama_server_infos.LIGHTRAG_NAME], "parameter_size": "13B", "quantization_level": "Q4_0", }, } ] ) @self.router.get("/ps", dependencies=[Depends(combined_auth)]) async def get_running_models(): """List Running Models - returns currently running models""" return OllamaPsResponse( models=[ { "name": self.ollama_server_infos.LIGHTRAG_MODEL, "model": self.ollama_server_infos.LIGHTRAG_MODEL, "size": self.ollama_server_infos.LIGHTRAG_SIZE, "digest": self.ollama_server_infos.LIGHTRAG_DIGEST, "details": { "parent_model": "", "format": "gguf", "family": "llama", "families": ["llama"], "parameter_size": "7.2B", "quantization_level": "Q4_0", }, "expires_at": "2050-12-31T14:38:31.83753-07:00", "size_vram": self.ollama_server_infos.LIGHTRAG_SIZE, } ] ) @self.router.post( "/generate", dependencies=[Depends(combined_auth)], include_in_schema=True ) async def generate(raw_request: Request): """Handle generate completion requests acting as an Ollama model For compatibility purpose, the request is not processed by LightRAG, and will be handled by underlying LLM model. Supports both application/json and application/octet-stream Content-Types. """ try: # Parse the request body manually request = await parse_request_body(raw_request, OllamaGenerateRequest) query = request.prompt start_time = time.time_ns() prompt_tokens = estimate_tokens(query) if request.system: self.rag.llm_model_kwargs["system_prompt"] = request.system if request.stream: response = await self.rag.llm_model_func( query, stream=True, **self.rag.llm_model_kwargs ) async def stream_generator(): first_chunk_time = None last_chunk_time = time.time_ns() total_response = "" # Ensure response is an async generator if isinstance(response, str): # If it's a string, send in two parts first_chunk_time = start_time last_chunk_time = time.time_ns() total_response = response data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": response, "done": False, } yield f"{json.dumps(data, ensure_ascii=False)}\n" completion_tokens = estimate_tokens(total_response) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time eval_time = last_chunk_time - first_chunk_time data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": "", "done": True, "done_reason": "stop", "context": [], "total_duration": total_time, "load_duration": 0, "prompt_eval_count": prompt_tokens, "prompt_eval_duration": prompt_eval_time, "eval_count": completion_tokens, "eval_duration": eval_time, } yield f"{json.dumps(data, ensure_ascii=False)}\n" else: try: async for chunk in response: if chunk: if first_chunk_time is None: first_chunk_time = time.time_ns() last_chunk_time = time.time_ns() total_response += chunk data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": chunk, "done": False, } yield f"{json.dumps(data, ensure_ascii=False)}\n" except (asyncio.CancelledError, Exception) as e: error_msg = str(e) if isinstance(e, asyncio.CancelledError): error_msg = "Stream was cancelled by server" else: error_msg = f"Provider error: {error_msg}" logger.error(f"Stream error: {error_msg}") # Send error message to client error_data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": f"\n\nError: {error_msg}", "error": f"\n\nError: {error_msg}", "done": False, } yield f"{json.dumps(error_data, ensure_ascii=False)}\n" # Send final message to close the stream final_data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": "", "done": True, } yield f"{json.dumps(final_data, ensure_ascii=False)}\n" return if first_chunk_time is None: first_chunk_time = start_time completion_tokens = estimate_tokens(total_response) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time eval_time = last_chunk_time - first_chunk_time data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": "", "done": True, "done_reason": "stop", "context": [], "total_duration": total_time, "load_duration": 0, "prompt_eval_count": prompt_tokens, "prompt_eval_duration": prompt_eval_time, "eval_count": completion_tokens, "eval_duration": eval_time, } yield f"{json.dumps(data, ensure_ascii=False)}\n" return return StreamingResponse( stream_generator(), media_type="application/x-ndjson", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Type": "application/x-ndjson", "X-Accel-Buffering": "no", # Ensure proper handling of streaming responses in Nginx proxy }, ) else: first_chunk_time = time.time_ns() response_text = await self.rag.llm_model_func( query, stream=False, **self.rag.llm_model_kwargs ) last_chunk_time = time.time_ns() if not response_text: response_text = "No response generated" completion_tokens = estimate_tokens(str(response_text)) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time eval_time = last_chunk_time - first_chunk_time return { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "response": str(response_text), "done": True, "done_reason": "stop", "context": [], "total_duration": total_time, "load_duration": 0, "prompt_eval_count": prompt_tokens, "prompt_eval_duration": prompt_eval_time, "eval_count": completion_tokens, "eval_duration": eval_time, } except Exception as e: logger.error(f"Ollama generate error: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @self.router.post( "/chat", dependencies=[Depends(combined_auth)], include_in_schema=True ) async def chat(raw_request: Request): """Process chat completion requests by acting as an Ollama model. Routes user queries through LightRAG by selecting query mode based on query prefix. Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM. Supports both application/json and application/octet-stream Content-Types. """ try: # Parse the request body manually request = await parse_request_body(raw_request, OllamaChatRequest) # Get all messages messages = request.messages if not messages: raise HTTPException(status_code=400, detail="No messages provided") # Validate that the last message is from a user if messages[-1].role != "user": raise HTTPException( status_code=400, detail="Last message must be from user role" ) # Get the last message as query and previous messages as history query = messages[-1].content # Convert OllamaMessage objects to dictionaries conversation_history = [ {"role": msg.role, "content": msg.content} for msg in messages[:-1] ] # Check for query prefix cleaned_query, mode, only_need_context, user_prompt = parse_query_mode( query ) start_time = time.time_ns() prompt_tokens = estimate_tokens(cleaned_query) param_dict = { "mode": mode.value, "stream": request.stream, "only_need_context": only_need_context, "conversation_history": conversation_history, "top_k": self.top_k, } # Add user_prompt to param_dict if user_prompt is not None: param_dict["user_prompt"] = user_prompt query_param = QueryParam(**param_dict) if request.stream: # Determine if the request is prefix with "/bypass" if mode == SearchMode.bypass: if request.system: self.rag.llm_model_kwargs["system_prompt"] = request.system response = await self.rag.llm_model_func( cleaned_query, stream=True, history_messages=conversation_history, **self.rag.llm_model_kwargs, ) else: response = await self.rag.aquery( cleaned_query, param=query_param ) async def stream_generator(): first_chunk_time = None last_chunk_time = time.time_ns() total_response = "" # Ensure response is an async generator if isinstance(response, str): # If it's a string, send in two parts first_chunk_time = start_time last_chunk_time = time.time_ns() total_response = response data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": response, "images": None, }, "done": False, } yield f"{json.dumps(data, ensure_ascii=False)}\n" completion_tokens = estimate_tokens(total_response) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time eval_time = last_chunk_time - first_chunk_time data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": "", "images": None, }, "done_reason": "stop", "done": True, "total_duration": total_time, "load_duration": 0, "prompt_eval_count": prompt_tokens, "prompt_eval_duration": prompt_eval_time, "eval_count": completion_tokens, "eval_duration": eval_time, } yield f"{json.dumps(data, ensure_ascii=False)}\n" else: try: async for chunk in response: if chunk: if first_chunk_time is None: first_chunk_time = time.time_ns() last_chunk_time = time.time_ns() total_response += chunk data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": chunk, "images": None, }, "done": False, } yield f"{json.dumps(data, ensure_ascii=False)}\n" except (asyncio.CancelledError, Exception) as e: error_msg = str(e) if isinstance(e, asyncio.CancelledError): error_msg = "Stream was cancelled by server" else: error_msg = f"Provider error: {error_msg}" logger.error(f"Stream error: {error_msg}") # Send error message to client error_data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": f"\n\nError: {error_msg}", "images": None, }, "error": f"\n\nError: {error_msg}", "done": False, } yield f"{json.dumps(error_data, ensure_ascii=False)}\n" # Send final message to close the stream final_data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": "", "images": None, }, "done": True, } yield f"{json.dumps(final_data, ensure_ascii=False)}\n" return if first_chunk_time is None: first_chunk_time = start_time completion_tokens = estimate_tokens(total_response) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time eval_time = last_chunk_time - first_chunk_time data = { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": "", "images": None, }, "done_reason": "stop", "done": True, "total_duration": total_time, "load_duration": 0, "prompt_eval_count": prompt_tokens, "prompt_eval_duration": prompt_eval_time, "eval_count": completion_tokens, "eval_duration": eval_time, } yield f"{json.dumps(data, ensure_ascii=False)}\n" return StreamingResponse( stream_generator(), media_type="application/x-ndjson", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Type": "application/x-ndjson", "X-Accel-Buffering": "no", # Ensure proper handling of streaming responses in Nginx proxy }, ) else: first_chunk_time = time.time_ns() # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task match_result = re.search( r"\n\nUSER:", cleaned_query, re.MULTILINE ) if match_result or mode == SearchMode.bypass: if request.system: self.rag.llm_model_kwargs["system_prompt"] = request.system response_text = await self.rag.llm_model_func( cleaned_query, stream=False, history_messages=conversation_history, **self.rag.llm_model_kwargs, ) else: response_text = await self.rag.aquery( cleaned_query, param=query_param ) last_chunk_time = time.time_ns() if not response_text: response_text = "No response generated" completion_tokens = estimate_tokens(str(response_text)) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time eval_time = last_chunk_time - first_chunk_time return { "model": self.ollama_server_infos.LIGHTRAG_MODEL, "created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT, "message": { "role": "assistant", "content": str(response_text), "images": None, }, "done_reason": "stop", "done": True, "total_duration": total_time, "load_duration": 0, "prompt_eval_count": prompt_tokens, "prompt_eval_duration": prompt_eval_time, "eval_count": completion_tokens, "eval_duration": eval_time, } except Exception as e: logger.error(f"Ollama chat error: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) ================================================ FILE: lightrag/api/routers/query_routes.py ================================================ """ This module contains all query-related routes for the LightRAG API. """ import json from typing import Any, Dict, List, Literal, Optional from fastapi import APIRouter, Depends, HTTPException from lightrag.base import QueryParam from lightrag.api.utils_api import get_combined_auth_dependency from lightrag.utils import logger from pydantic import BaseModel, Field, field_validator router = APIRouter(tags=["query"]) class QueryRequest(BaseModel): query: str = Field( min_length=3, description="The query text", ) mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = Field( default="mix", description="Query mode", ) only_need_context: Optional[bool] = Field( default=None, description="If True, only returns the retrieved context without generating a response.", ) only_need_prompt: Optional[bool] = Field( default=None, description="If True, only returns the generated prompt without producing a response.", ) response_type: Optional[str] = Field( min_length=1, default=None, description="Defines the response format. Examples: 'Multiple Paragraphs', 'Single Paragraph', 'Bullet Points'.", ) top_k: Optional[int] = Field( ge=1, default=None, description="Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.", ) chunk_top_k: Optional[int] = Field( ge=1, default=None, description="Number of text chunks to retrieve initially from vector search and keep after reranking.", ) max_entity_tokens: Optional[int] = Field( default=None, description="Maximum number of tokens allocated for entity context in unified token control system.", ge=1, ) max_relation_tokens: Optional[int] = Field( default=None, description="Maximum number of tokens allocated for relationship context in unified token control system.", ge=1, ) max_total_tokens: Optional[int] = Field( default=None, description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).", ge=1, ) hl_keywords: list[str] = Field( default_factory=list, description="List of high-level keywords to prioritize in retrieval. Leave empty to use the LLM to generate the keywords.", ) ll_keywords: list[str] = Field( default_factory=list, description="List of low-level keywords to refine retrieval focus. Leave empty to use the LLM to generate the keywords.", ) conversation_history: Optional[List[Dict[str, Any]]] = Field( default=None, description="History messages are only sent to LLM for context, not used for retrieval. Format: [{'role': 'user/assistant', 'content': 'message'}].", ) user_prompt: Optional[str] = Field( default=None, description="User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template.", ) enable_rerank: Optional[bool] = Field( default=None, description="Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True.", ) include_references: Optional[bool] = Field( default=True, description="If True, includes reference list in responses. Affects /query and /query/stream endpoints. /query/data always includes references.", ) include_chunk_content: Optional[bool] = Field( default=False, description="If True, includes actual chunk text content in references. Only applies when include_references=True. Useful for evaluation and debugging.", ) stream: Optional[bool] = Field( default=True, description="If True, enables streaming output for real-time responses. Only affects /query/stream endpoint.", ) @field_validator("query", mode="after") @classmethod def query_strip_after(cls, query: str) -> str: return query.strip() @field_validator("conversation_history", mode="after") @classmethod def conversation_history_role_check( cls, conversation_history: List[Dict[str, Any]] | None ) -> List[Dict[str, Any]] | None: if conversation_history is None: return None for msg in conversation_history: if "role" not in msg: raise ValueError("Each message must have a 'role' key.") if not isinstance(msg["role"], str) or not msg["role"].strip(): raise ValueError("Each message 'role' must be a non-empty string.") return conversation_history def to_query_params(self, is_stream: bool) -> "QueryParam": """Converts a QueryRequest instance into a QueryParam instance.""" # Use Pydantic's `.model_dump(exclude_none=True)` to remove None values automatically # Exclude API-level parameters that don't belong in QueryParam request_data = self.model_dump( exclude_none=True, exclude={"query", "include_chunk_content"} ) # Ensure `mode` and `stream` are set explicitly param = QueryParam(**request_data) param.stream = is_stream return param class ReferenceItem(BaseModel): """A single reference item in query responses.""" reference_id: str = Field(description="Unique reference identifier") file_path: str = Field(description="Path to the source file") content: Optional[List[str]] = Field( default=None, description="List of chunk contents from this file (only present when include_chunk_content=True)", ) class QueryResponse(BaseModel): response: str = Field( description="The generated response", ) references: Optional[List[ReferenceItem]] = Field( default=None, description="Reference list (Disabled when include_references=False, /query/data always includes references.)", ) class QueryDataResponse(BaseModel): status: str = Field(description="Query execution status") message: str = Field(description="Status message") data: Dict[str, Any] = Field( description="Query result data containing entities, relationships, chunks, and references" ) metadata: Dict[str, Any] = Field( description="Query metadata including mode, keywords, and processing information" ) class StreamChunkResponse(BaseModel): """Response model for streaming chunks in NDJSON format""" references: Optional[List[Dict[str, str]]] = Field( default=None, description="Reference list (only in first chunk when include_references=True)", ) response: Optional[str] = Field( default=None, description="Response content chunk or complete response" ) error: Optional[str] = Field( default=None, description="Error message if processing fails" ) def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60): combined_auth = get_combined_auth_dependency(api_key) @router.post( "/query", response_model=QueryResponse, dependencies=[Depends(combined_auth)], responses={ 200: { "description": "Successful RAG query response", "content": { "application/json": { "schema": { "type": "object", "properties": { "response": { "type": "string", "description": "The generated response from the RAG system", }, "references": { "type": "array", "items": { "type": "object", "properties": { "reference_id": {"type": "string"}, "file_path": {"type": "string"}, "content": { "type": "array", "items": {"type": "string"}, "description": "List of chunk contents from this file (only included when include_chunk_content=True)", }, }, }, "description": "Reference list (only included when include_references=True)", }, }, "required": ["response"], }, "examples": { "with_references": { "summary": "Response with references", "description": "Example response when include_references=True", "value": { "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.", "references": [ { "reference_id": "1", "file_path": "/documents/ai_overview.pdf", }, { "reference_id": "2", "file_path": "/documents/machine_learning.txt", }, ], }, }, "with_chunk_content": { "summary": "Response with chunk content", "description": "Example response when include_references=True and include_chunk_content=True. Note: content is an array of chunks from the same file.", "value": { "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.", "references": [ { "reference_id": "1", "file_path": "/documents/ai_overview.pdf", "content": [ "Artificial Intelligence (AI) represents a transformative field in computer science focused on creating systems that can perform tasks requiring human-like intelligence. These tasks include learning from experience, understanding natural language, recognizing patterns, and making decisions.", "AI systems can be categorized into narrow AI, which is designed for specific tasks, and general AI, which aims to match human cognitive abilities across a wide range of domains.", ], }, { "reference_id": "2", "file_path": "/documents/machine_learning.txt", "content": [ "Machine learning is a subset of AI that enables computers to learn and improve from experience without being explicitly programmed. It focuses on the development of algorithms that can access data and use it to learn for themselves." ], }, ], }, }, "without_references": { "summary": "Response without references", "description": "Example response when include_references=False", "value": { "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving." }, }, "different_modes": { "summary": "Different query modes", "description": "Examples of responses from different query modes", "value": { "local_mode": "Focuses on specific entities and their relationships", "global_mode": "Provides broader context from relationship patterns", "hybrid_mode": "Combines local and global approaches", "naive_mode": "Simple vector similarity search", "mix_mode": "Integrates knowledge graph and vector retrieval", }, }, }, } }, }, 400: { "description": "Bad Request - Invalid input parameters", "content": { "application/json": { "schema": { "type": "object", "properties": {"detail": {"type": "string"}}, }, "example": { "detail": "Query text must be at least 3 characters long" }, } }, }, 500: { "description": "Internal Server Error - Query processing failed", "content": { "application/json": { "schema": { "type": "object", "properties": {"detail": {"type": "string"}}, }, "example": { "detail": "Failed to process query: LLM service unavailable" }, } }, }, }, ) async def query_text(request: QueryRequest): """ Comprehensive RAG query endpoint with non-streaming response. Parameter "stream" is ignored. This endpoint performs Retrieval-Augmented Generation (RAG) queries using various modes to provide intelligent responses based on your knowledge base. **Query Modes:** - **local**: Focuses on specific entities and their direct relationships - **global**: Analyzes broader patterns and relationships across the knowledge graph - **hybrid**: Combines local and global approaches for comprehensive results - **naive**: Simple vector similarity search without knowledge graph - **mix**: Integrates knowledge graph retrieval with vector search (recommended) - **bypass**: Direct LLM query without knowledge retrieval conversation_history parameteris sent to LLM only, does not affect retrieval results. **Usage Examples:** Basic query: ```json { "query": "What is machine learning?", "mode": "mix" } ``` Bypass initial LLM call by providing high-level and low-level keywords: ```json { "query": "What is Retrieval-Augmented-Generation?", "hl_keywords": ["machine learning", "information retrieval", "natural language processing"], "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"], "mode": "mix" } ``` Advanced query with references: ```json { "query": "Explain neural networks", "mode": "hybrid", "include_references": true, "response_type": "Multiple Paragraphs", "top_k": 10 } ``` Conversation with history: ```json { "query": "Can you give me more details?", "conversation_history": [ {"role": "user", "content": "What is AI?"}, {"role": "assistant", "content": "AI is artificial intelligence..."} ] } ``` Args: request (QueryRequest): The request object containing query parameters: - **query**: The question or prompt to process (min 3 characters) - **mode**: Query strategy - "mix" recommended for best results - **include_references**: Whether to include source citations - **response_type**: Format preference (e.g., "Multiple Paragraphs") - **top_k**: Number of top entities/relations to retrieve - **conversation_history**: Previous dialogue context - **max_total_tokens**: Token budget for the entire response Returns: QueryResponse: JSON response containing: - **response**: The generated answer to your query - **references**: Source citations (if include_references=True) Raises: HTTPException: - 400: Invalid input parameters (e.g., query too short) - 500: Internal processing error (e.g., LLM service unavailable) """ try: param = request.to_query_params( False ) # Ensure stream=False for non-streaming endpoint # Force stream=False for /query endpoint regardless of include_references setting param.stream = False # Unified approach: always use aquery_llm for both cases result = await rag.aquery_llm(request.query, param=param) # Extract LLM response and references from unified result llm_response = result.get("llm_response", {}) data = result.get("data", {}) references = data.get("references", []) # Get the non-streaming response content response_content = llm_response.get("content", "") if not response_content: response_content = "No relevant context found for the query." # Enrich references with chunk content if requested if request.include_references and request.include_chunk_content: chunks = data.get("chunks", []) # Create a mapping from reference_id to chunk content ref_id_to_content = {} for chunk in chunks: ref_id = chunk.get("reference_id", "") content = chunk.get("content", "") if ref_id and content: # Collect chunk content; join later to avoid quadratic string concatenation ref_id_to_content.setdefault(ref_id, []).append(content) # Add content to references enriched_references = [] for ref in references: ref_copy = ref.copy() ref_id = ref.get("reference_id", "") if ref_id in ref_id_to_content: # Keep content as a list of chunks (one file may have multiple chunks) ref_copy["content"] = ref_id_to_content[ref_id] enriched_references.append(ref_copy) references = enriched_references # Return response with or without references based on request if request.include_references: return QueryResponse(response=response_content, references=references) else: return QueryResponse(response=response_content, references=None) except Exception as e: logger.error(f"Error processing query: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/query/stream", dependencies=[Depends(combined_auth)], responses={ 200: { "description": "Flexible RAG query response - format depends on stream parameter", "content": { "application/x-ndjson": { "schema": { "type": "string", "format": "ndjson", "description": "Newline-delimited JSON (NDJSON) format used for both streaming and non-streaming responses. For streaming: multiple lines with separate JSON objects. For non-streaming: single line with complete JSON object.", "example": '{"references": [{"reference_id": "1", "file_path": "/documents/ai.pdf"}]}\n{"response": "Artificial Intelligence is"}\n{"response": " a field of computer science"}\n{"response": " that focuses on creating intelligent machines."}', }, "examples": { "streaming_with_references": { "summary": "Streaming mode with references (stream=true)", "description": "Multiple NDJSON lines when stream=True and include_references=True. First line contains references, subsequent lines contain response chunks.", "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai_overview.pdf"}, {"reference_id": "2", "file_path": "/documents/ml_basics.txt"}]}\n{"response": "Artificial Intelligence (AI) is a branch of computer science"}\n{"response": " that aims to create intelligent machines capable of performing"}\n{"response": " tasks that typically require human intelligence, such as learning,"}\n{"response": " reasoning, and problem-solving."}', }, "streaming_with_chunk_content": { "summary": "Streaming mode with chunk content (stream=true, include_chunk_content=true)", "description": "Multiple NDJSON lines when stream=True, include_references=True, and include_chunk_content=True. First line contains references with content arrays (one file may have multiple chunks), subsequent lines contain response chunks.", "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai_overview.pdf", "content": ["Artificial Intelligence (AI) represents a transformative field...", "AI systems can be categorized into narrow AI and general AI..."]}, {"reference_id": "2", "file_path": "/documents/ml_basics.txt", "content": ["Machine learning is a subset of AI that enables computers to learn..."]}]}\n{"response": "Artificial Intelligence (AI) is a branch of computer science"}\n{"response": " that aims to create intelligent machines capable of performing"}\n{"response": " tasks that typically require human intelligence."}', }, "streaming_without_references": { "summary": "Streaming mode without references (stream=true)", "description": "Multiple NDJSON lines when stream=True and include_references=False. Only response chunks are sent.", "value": '{"response": "Machine learning is a subset of artificial intelligence"}\n{"response": " that enables computers to learn and improve from experience"}\n{"response": " without being explicitly programmed for every task."}', }, "non_streaming_with_references": { "summary": "Non-streaming mode with references (stream=false)", "description": "Single NDJSON line when stream=False and include_references=True. Complete response with references in one message.", "value": '{"references": [{"reference_id": "1", "file_path": "/documents/neural_networks.pdf"}], "response": "Neural networks are computational models inspired by biological neural networks that consist of interconnected nodes (neurons) organized in layers. They are fundamental to deep learning and can learn complex patterns from data through training processes."}', }, "non_streaming_without_references": { "summary": "Non-streaming mode without references (stream=false)", "description": "Single NDJSON line when stream=False and include_references=False. Complete response only.", "value": '{"response": "Deep learning is a subset of machine learning that uses neural networks with multiple layers (hence deep) to model and understand complex patterns in data. It has revolutionized fields like computer vision, natural language processing, and speech recognition."}', }, "error_response": { "summary": "Error during streaming", "description": "Error handling in NDJSON format when an error occurs during processing.", "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai.pdf"}]}\n{"response": "Artificial Intelligence is"}\n{"error": "LLM service temporarily unavailable"}', }, }, } }, }, 400: { "description": "Bad Request - Invalid input parameters", "content": { "application/json": { "schema": { "type": "object", "properties": {"detail": {"type": "string"}}, }, "example": { "detail": "Query text must be at least 3 characters long" }, } }, }, 500: { "description": "Internal Server Error - Query processing failed", "content": { "application/json": { "schema": { "type": "object", "properties": {"detail": {"type": "string"}}, }, "example": { "detail": "Failed to process streaming query: Knowledge graph unavailable" }, } }, }, }, ) async def query_text_stream(request: QueryRequest): """ Advanced RAG query endpoint with flexible streaming response. This endpoint provides the most flexible querying experience, supporting both real-time streaming and complete response delivery based on your integration needs. **Response Modes:** - Real-time response delivery as content is generated - NDJSON format: each line is a separate JSON object - First line: `{"references": [...]}` (if include_references=True) - Subsequent lines: `{"response": "content chunk"}` - Error handling: `{"error": "error message"}` > If stream parameter is False, or the query hit LLM cache, complete response delivered in a single streaming message. **Response Format Details** - **Content-Type**: `application/x-ndjson` (Newline-Delimited JSON) - **Structure**: Each line is an independent, valid JSON object - **Parsing**: Process line-by-line, each line is self-contained - **Headers**: Includes cache control and connection management **Query Modes (same as /query endpoint)** - **local**: Entity-focused retrieval with direct relationships - **global**: Pattern analysis across the knowledge graph - **hybrid**: Combined local and global strategies - **naive**: Vector similarity search only - **mix**: Integrated knowledge graph + vector retrieval (recommended) - **bypass**: Direct LLM query without knowledge retrieval conversation_history parameteris sent to LLM only, does not affect retrieval results. **Usage Examples** Real-time streaming query: ```json { "query": "Explain machine learning algorithms", "mode": "mix", "stream": true, "include_references": true } ``` Bypass initial LLM call by providing high-level and low-level keywords: ```json { "query": "What is Retrieval-Augmented-Generation?", "hl_keywords": ["machine learning", "information retrieval", "natural language processing"], "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"], "mode": "mix" } ``` Complete response query: ```json { "query": "What is deep learning?", "mode": "hybrid", "stream": false, "response_type": "Multiple Paragraphs" } ``` Conversation with context: ```json { "query": "Can you elaborate on that?", "stream": true, "conversation_history": [ {"role": "user", "content": "What is neural network?"}, {"role": "assistant", "content": "A neural network is..."} ] } ``` **Response Processing:** ```python async for line in response.iter_lines(): data = json.loads(line) if "references" in data: # Handle references (first message) references = data["references"] if "response" in data: # Handle content chunk content_chunk = data["response"] if "error" in data: # Handle error error_message = data["error"] ``` **Error Handling:** - Streaming errors are delivered as `{"error": "message"}` lines - Non-streaming errors raise HTTP exceptions - Partial responses may be delivered before errors in streaming mode - Always check for error objects when processing streaming responses Args: request (QueryRequest): The request object containing query parameters: - **query**: The question or prompt to process (min 3 characters) - **mode**: Query strategy - "mix" recommended for best results - **stream**: Enable streaming (True) or complete response (False) - **include_references**: Whether to include source citations - **response_type**: Format preference (e.g., "Multiple Paragraphs") - **top_k**: Number of top entities/relations to retrieve - **conversation_history**: Previous dialogue context for multi-turn conversations - **max_total_tokens**: Token budget for the entire response Returns: StreamingResponse: NDJSON streaming response containing: - **Streaming mode**: Multiple JSON objects, one per line - References object (if requested): `{"references": [...]}` - Content chunks: `{"response": "chunk content"}` - Error objects: `{"error": "error message"}` - **Non-streaming mode**: Single JSON object - Complete response: `{"references": [...], "response": "complete content"}` Raises: HTTPException: - 400: Invalid input parameters (e.g., query too short, invalid mode) - 500: Internal processing error (e.g., LLM service unavailable) Note: This endpoint is ideal for applications requiring flexible response delivery. Use streaming mode for real-time interfaces and non-streaming for batch processing. """ try: # Use the stream parameter from the request, defaulting to True if not specified stream_mode = request.stream if request.stream is not None else True param = request.to_query_params(stream_mode) from fastapi.responses import StreamingResponse # Unified approach: always use aquery_llm for all cases result = await rag.aquery_llm(request.query, param=param) async def stream_generator(): # Extract references and LLM response from unified result references = result.get("data", {}).get("references", []) llm_response = result.get("llm_response", {}) # Enrich references with chunk content if requested if request.include_references and request.include_chunk_content: data = result.get("data", {}) chunks = data.get("chunks", []) # Create a mapping from reference_id to chunk content ref_id_to_content = {} for chunk in chunks: ref_id = chunk.get("reference_id", "") content = chunk.get("content", "") if ref_id and content: # Collect chunk content ref_id_to_content.setdefault(ref_id, []).append(content) # Add content to references enriched_references = [] for ref in references: ref_copy = ref.copy() ref_id = ref.get("reference_id", "") if ref_id in ref_id_to_content: # Keep content as a list of chunks (one file may have multiple chunks) ref_copy["content"] = ref_id_to_content[ref_id] enriched_references.append(ref_copy) references = enriched_references if llm_response.get("is_streaming"): # Streaming mode: send references first, then stream response chunks if request.include_references: yield f"{json.dumps({'references': references})}\n" response_stream = llm_response.get("response_iterator") if response_stream: try: async for chunk in response_stream: if chunk: # Only send non-empty content yield f"{json.dumps({'response': chunk})}\n" except Exception as e: logger.error(f"Streaming error: {str(e)}") yield f"{json.dumps({'error': str(e)})}\n" else: # Non-streaming mode: send complete response in one message response_content = llm_response.get("content", "") if not response_content: response_content = "No relevant context found for the query." # Create complete response object complete_response = {"response": response_content} if request.include_references: complete_response["references"] = references yield f"{json.dumps(complete_response)}\n" return StreamingResponse( stream_generator(), media_type="application/x-ndjson", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Type": "application/x-ndjson", "X-Accel-Buffering": "no", # Ensure proper handling of streaming response when proxied by Nginx }, ) except Exception as e: logger.error(f"Error processing streaming query: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.post( "/query/data", response_model=QueryDataResponse, dependencies=[Depends(combined_auth)], responses={ 200: { "description": "Successful data retrieval response with structured RAG data", "content": { "application/json": { "schema": { "type": "object", "properties": { "status": { "type": "string", "enum": ["success", "failure"], "description": "Query execution status", }, "message": { "type": "string", "description": "Status message describing the result", }, "data": { "type": "object", "properties": { "entities": { "type": "array", "items": { "type": "object", "properties": { "entity_name": {"type": "string"}, "entity_type": {"type": "string"}, "description": {"type": "string"}, "source_id": {"type": "string"}, "file_path": {"type": "string"}, "reference_id": {"type": "string"}, }, }, "description": "Retrieved entities from knowledge graph", }, "relationships": { "type": "array", "items": { "type": "object", "properties": { "src_id": {"type": "string"}, "tgt_id": {"type": "string"}, "description": {"type": "string"}, "keywords": {"type": "string"}, "weight": {"type": "number"}, "source_id": {"type": "string"}, "file_path": {"type": "string"}, "reference_id": {"type": "string"}, }, }, "description": "Retrieved relationships from knowledge graph", }, "chunks": { "type": "array", "items": { "type": "object", "properties": { "content": {"type": "string"}, "file_path": {"type": "string"}, "chunk_id": {"type": "string"}, "reference_id": {"type": "string"}, }, }, "description": "Retrieved text chunks from vector database", }, "references": { "type": "array", "items": { "type": "object", "properties": { "reference_id": {"type": "string"}, "file_path": {"type": "string"}, }, }, "description": "Reference list for citation purposes", }, }, "description": "Structured retrieval data containing entities, relationships, chunks, and references", }, "metadata": { "type": "object", "properties": { "query_mode": {"type": "string"}, "keywords": { "type": "object", "properties": { "high_level": { "type": "array", "items": {"type": "string"}, }, "low_level": { "type": "array", "items": {"type": "string"}, }, }, }, "processing_info": { "type": "object", "properties": { "total_entities_found": { "type": "integer" }, "total_relations_found": { "type": "integer" }, "entities_after_truncation": { "type": "integer" }, "relations_after_truncation": { "type": "integer" }, "final_chunks_count": { "type": "integer" }, }, }, }, "description": "Query metadata including mode, keywords, and processing information", }, }, "required": ["status", "message", "data", "metadata"], }, "examples": { "successful_local_mode": { "summary": "Local mode data retrieval", "description": "Example of structured data from local mode query focusing on specific entities", "value": { "status": "success", "message": "Query executed successfully", "data": { "entities": [ { "entity_name": "Neural Networks", "entity_type": "CONCEPT", "description": "Computational models inspired by biological neural networks", "source_id": "chunk-123", "file_path": "/documents/ai_basics.pdf", "reference_id": "1", } ], "relationships": [ { "src_id": "Neural Networks", "tgt_id": "Machine Learning", "description": "Neural networks are a subset of machine learning algorithms", "keywords": "subset, algorithm, learning", "weight": 0.85, "source_id": "chunk-123", "file_path": "/documents/ai_basics.pdf", "reference_id": "1", } ], "chunks": [ { "content": "Neural networks are computational models that mimic the way biological neural networks work...", "file_path": "/documents/ai_basics.pdf", "chunk_id": "chunk-123", "reference_id": "1", } ], "references": [ { "reference_id": "1", "file_path": "/documents/ai_basics.pdf", } ], }, "metadata": { "query_mode": "local", "keywords": { "high_level": ["neural", "networks"], "low_level": [ "computation", "model", "algorithm", ], }, "processing_info": { "total_entities_found": 5, "total_relations_found": 3, "entities_after_truncation": 1, "relations_after_truncation": 1, "final_chunks_count": 1, }, }, }, }, "global_mode": { "summary": "Global mode data retrieval", "description": "Example of structured data from global mode query analyzing broader patterns", "value": { "status": "success", "message": "Query executed successfully", "data": { "entities": [], "relationships": [ { "src_id": "Artificial Intelligence", "tgt_id": "Machine Learning", "description": "AI encompasses machine learning as a core component", "keywords": "encompasses, component, field", "weight": 0.92, "source_id": "chunk-456", "file_path": "/documents/ai_overview.pdf", "reference_id": "2", } ], "chunks": [], "references": [ { "reference_id": "2", "file_path": "/documents/ai_overview.pdf", } ], }, "metadata": { "query_mode": "global", "keywords": { "high_level": [ "artificial", "intelligence", "overview", ], "low_level": [], }, }, }, }, "naive_mode": { "summary": "Naive mode data retrieval", "description": "Example of structured data from naive mode using only vector search", "value": { "status": "success", "message": "Query executed successfully", "data": { "entities": [], "relationships": [], "chunks": [ { "content": "Deep learning is a subset of machine learning that uses neural networks with multiple layers...", "file_path": "/documents/deep_learning.pdf", "chunk_id": "chunk-789", "reference_id": "3", } ], "references": [ { "reference_id": "3", "file_path": "/documents/deep_learning.pdf", } ], }, "metadata": { "query_mode": "naive", "keywords": {"high_level": [], "low_level": []}, }, }, }, }, } }, }, 400: { "description": "Bad Request - Invalid input parameters", "content": { "application/json": { "schema": { "type": "object", "properties": {"detail": {"type": "string"}}, }, "example": { "detail": "Query text must be at least 3 characters long" }, } }, }, 500: { "description": "Internal Server Error - Data retrieval failed", "content": { "application/json": { "schema": { "type": "object", "properties": {"detail": {"type": "string"}}, }, "example": { "detail": "Failed to retrieve data: Knowledge graph unavailable" }, } }, }, }, ) async def query_data(request: QueryRequest): """ Advanced data retrieval endpoint for structured RAG analysis. This endpoint provides raw retrieval results without LLM generation, perfect for: - **Data Analysis**: Examine what information would be used for RAG - **System Integration**: Get structured data for custom processing - **Debugging**: Understand retrieval behavior and quality - **Research**: Analyze knowledge graph structure and relationships **Key Features:** - No LLM generation - pure data retrieval - Complete structured output with entities, relationships, and chunks - Always includes references for citation - Detailed metadata about processing and keywords - Compatible with all query modes and parameters **Query Mode Behaviors:** - **local**: Returns entities and their direct relationships + related chunks - **global**: Returns relationship patterns across the knowledge graph - **hybrid**: Combines local and global retrieval strategies - **naive**: Returns only vector-retrieved text chunks (no knowledge graph) - **mix**: Integrates knowledge graph data with vector-retrieved chunks - **bypass**: Returns empty data arrays (used for direct LLM queries) **Data Structure:** - **entities**: Knowledge graph entities with descriptions and metadata - **relationships**: Connections between entities with weights and descriptions - **chunks**: Text segments from documents with source information - **references**: Citation information mapping reference IDs to file paths - **metadata**: Processing information, keywords, and query statistics **Usage Examples:** Analyze entity relationships: ```json { "query": "machine learning algorithms", "mode": "local", "top_k": 10 } ``` Explore global patterns: ```json { "query": "artificial intelligence trends", "mode": "global", "max_relation_tokens": 2000 } ``` Vector similarity search: ```json { "query": "neural network architectures", "mode": "naive", "chunk_top_k": 5 } ``` Bypass initial LLM call by providing high-level and low-level keywords: ```json { "query": "What is Retrieval-Augmented-Generation?", "hl_keywords": ["machine learning", "information retrieval", "natural language processing"], "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"], "mode": "mix" } ``` **Response Analysis:** - **Empty arrays**: Normal for certain modes (e.g., naive mode has no entities/relationships) - **Processing info**: Shows retrieval statistics and token usage - **Keywords**: High-level and low-level keywords extracted from query - **Reference mapping**: Links all data back to source documents Args: request (QueryRequest): The request object containing query parameters: - **query**: The search query to analyze (min 3 characters) - **mode**: Retrieval strategy affecting data types returned - **top_k**: Number of top entities/relationships to retrieve - **chunk_top_k**: Number of text chunks to retrieve - **max_entity_tokens**: Token limit for entity context - **max_relation_tokens**: Token limit for relationship context - **max_total_tokens**: Overall token budget for retrieval Returns: QueryDataResponse: Structured JSON response containing: - **status**: "success" or "failure" - **message**: Human-readable status description - **data**: Complete retrieval results with entities, relationships, chunks, references - **metadata**: Query processing information and statistics Raises: HTTPException: - 400: Invalid input parameters (e.g., query too short, invalid mode) - 500: Internal processing error (e.g., knowledge graph unavailable) Note: This endpoint always includes references regardless of the include_references parameter, as structured data analysis typically requires source attribution. """ try: param = request.to_query_params(False) # No streaming for data endpoint response = await rag.aquery_data(request.query, param=param) # aquery_data returns the new format with status, message, data, and metadata if isinstance(response, dict): return QueryDataResponse(**response) else: # Handle unexpected response format return QueryDataResponse( status="failure", message="Invalid response type", data={}, ) except Exception as e: logger.error(f"Error processing data query: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) return router ================================================ FILE: lightrag/api/run_with_gunicorn.py ================================================ #!/usr/bin/env python """ Start LightRAG server with Gunicorn """ import os import sys import platform import pipmaster as pm from lightrag.api.utils_api import display_splash_screen, check_env_file from lightrag.api.config import global_args from lightrag.utils import get_env_value from lightrag.kg.shared_storage import initialize_share_data from lightrag.constants import ( DEFAULT_WOKERS, DEFAULT_TIMEOUT, ) def check_and_install_dependencies(): """Check and install required dependencies""" required_packages = [ "gunicorn", "tiktoken", "psutil", # Add other required packages here ] for package in required_packages: if not pm.is_installed(package): print(f"Installing {package}...") pm.install(package) print(f"{package} installed successfully") def main(): # Explicitly initialize configuration for Gunicorn mode from lightrag.api.config import initialize_config initialize_config() # Set Gunicorn mode flag for lifespan cleanup detection os.environ["LIGHTRAG_GUNICORN_MODE"] = "1" # Check .env file if not check_env_file(): sys.exit(1) # Check DOCLING compatibility with Gunicorn multi-worker mode on macOS if ( platform.system() == "Darwin" and global_args.document_loading_engine == "DOCLING" and global_args.workers > 1 ): print("\n" + "=" * 80) print("❌ ERROR: Incompatible configuration detected!") print("=" * 80) print( "\nDOCLING engine with Gunicorn multi-worker mode is not supported on macOS" ) print("\nReason:") print(" PyTorch (required by DOCLING) has known compatibility issues with") print(" fork-based multiprocessing on macOS, which can cause crashes or") print(" unexpected behavior when using Gunicorn with multiple workers.") print("\nCurrent configuration:") print(" - Operating System: macOS (Darwin)") print(f" - Document Engine: {global_args.document_loading_engine}") print(f" - Workers: {global_args.workers}") print("\nPossible solutions:") print(" 1. Use single worker mode:") print(" --workers 1") print("\n 2. Change document loading engine in .env:") print(" DOCUMENT_LOADING_ENGINE=DEFAULT") print("\n 3. Deploy on Linux where multi-worker mode is fully supported") print("=" * 80 + "\n") sys.exit(1) # Check macOS fork safety environment variable for multi-worker mode if ( platform.system() == "Darwin" and global_args.workers > 1 and os.environ.get("OBJC_DISABLE_INITIALIZE_FORK_SAFETY") != "YES" ): print("\n" + "=" * 80) print("❌ ERROR: Missing required environment variable on macOS!") print("=" * 80) print("\nmacOS with Gunicorn multi-worker mode requires:") print(" OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES") print("\nReason:") print(" NumPy uses macOS's Accelerate framework (Objective-C based) for") print(" vector computations. The Objective-C runtime has fork safety checks") print(" that will crash worker processes when embedding functions are called.") print("\nCurrent configuration:") print(" - Operating System: macOS (Darwin)") print(f" - Workers: {global_args.workers}") print( f" - Environment Variable: {os.environ.get('OBJC_DISABLE_INITIALIZE_FORK_SAFETY', 'NOT SET')}" ) print("\nHow to fix:") print(" Option 1 - Set environment variable before starting (recommended):") print(" export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES") print(" lightrag-gunicorn --workers 2") print("\n Option 2 - Add to your shell profile (~/.zshrc or ~/.bash_profile):") print(" echo 'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES' >> ~/.zshrc") print(" source ~/.zshrc") print("\n Option 3 - Use single worker mode (no multiprocessing):") print(" lightrag-server --workers 1") print("=" * 80 + "\n") sys.exit(1) # Check and install dependencies check_and_install_dependencies() # Note: Signal handlers are NOT registered here because: # - Master cleanup already handled by gunicorn_config.on_exit() # Display startup information display_splash_screen(global_args) print("🚀 Starting LightRAG with Gunicorn") print(f"🔄 Worker management: Gunicorn (workers={global_args.workers})") print("🔍 Preloading app: Enabled") print("📝 Note: Using Gunicorn's preload feature for shared data initialization") print("\n\n" + "=" * 80) print("MAIN PROCESS INITIALIZATION") print(f"Process ID: {os.getpid()}") print(f"Workers setting: {global_args.workers}") print("=" * 80 + "\n") # Import Gunicorn's StandaloneApplication from gunicorn.app.base import BaseApplication # Define a custom application class that loads our config class GunicornApp(BaseApplication): def __init__(self, app, options=None): self.options = options or {} self.application = app super().__init__() def load_config(self): # Define valid Gunicorn configuration options valid_options = { "bind", "workers", "worker_class", "timeout", "keepalive", "preload_app", "errorlog", "accesslog", "loglevel", "certfile", "keyfile", "limit_request_line", "limit_request_fields", "limit_request_field_size", "graceful_timeout", "max_requests", "max_requests_jitter", } # Special hooks that need to be set separately special_hooks = { "on_starting", "on_reload", "on_exit", "pre_fork", "post_fork", "pre_exec", "pre_request", "post_request", "worker_init", "worker_exit", "nworkers_changed", "child_exit", } # Import and configure the gunicorn_config module from lightrag.api import gunicorn_config # Set configuration variables in gunicorn_config, prioritizing command line arguments gunicorn_config.workers = ( global_args.workers if global_args.workers else get_env_value("WORKERS", DEFAULT_WOKERS, int) ) # Bind configuration prioritizes command line arguments host = ( global_args.host if global_args.host != "0.0.0.0" else os.getenv("HOST", "0.0.0.0") ) port = ( global_args.port if global_args.port != 9621 else get_env_value("PORT", 9621, int) ) gunicorn_config.bind = f"{host}:{port}" # Log level configuration prioritizes command line arguments gunicorn_config.loglevel = ( global_args.log_level.lower() if global_args.log_level else os.getenv("LOG_LEVEL", "info") ) # Timeout configuration prioritizes command line arguments gunicorn_config.timeout = ( global_args.timeout + 30 if global_args.timeout is not None else get_env_value( "TIMEOUT", DEFAULT_TIMEOUT + 30, int, special_none=True ) ) # Keepalive configuration gunicorn_config.keepalive = get_env_value("KEEPALIVE", 5, int) # SSL configuration prioritizes command line arguments if global_args.ssl or os.getenv("SSL", "").lower() in ( "true", "1", "yes", "t", "on", ): gunicorn_config.certfile = ( global_args.ssl_certfile if global_args.ssl_certfile else os.getenv("SSL_CERTFILE") ) gunicorn_config.keyfile = ( global_args.ssl_keyfile if global_args.ssl_keyfile else os.getenv("SSL_KEYFILE") ) # Set configuration options from the module for key in dir(gunicorn_config): if key in valid_options: value = getattr(gunicorn_config, key) # Skip functions like on_starting and None values if not callable(value) and value is not None: self.cfg.set(key, value) # Set special hooks elif key in special_hooks: value = getattr(gunicorn_config, key) if callable(value): self.cfg.set(key, value) if hasattr(gunicorn_config, "logconfig_dict"): self.cfg.set( "logconfig_dict", getattr(gunicorn_config, "logconfig_dict") ) def load(self): # Import the application from lightrag.api.lightrag_server import get_application return get_application(global_args) # Create the application app = GunicornApp("") # Force workers to be an integer and greater than 1 for multi-process mode workers_count = global_args.workers if workers_count > 1: # Set a flag to indicate we're in the main process os.environ["LIGHTRAG_MAIN_PROCESS"] = "1" initialize_share_data(workers_count) else: initialize_share_data(1) # Run the application print("\nStarting Gunicorn with direct Python API...") app.run() if __name__ == "__main__": main() ================================================ FILE: lightrag/api/runtime_validation.py ================================================ """Helpers for validating startup runtime expectations from `.env`.""" from __future__ import annotations import os from dataclasses import dataclass from pathlib import Path from dotenv import dotenv_values _CONTAINER_RUNTIME_TARGETS = {"compose", "docker"} @dataclass(frozen=True) class RuntimeEnvironment: """Describes whether the current process is running in a container runtime.""" in_container: bool in_docker: bool in_kubernetes: bool @property def label(self) -> str: if self.in_kubernetes: return "Kubernetes" if self.in_docker: return "Docker" return "host" def _read_cgroup_content() -> str: """Best-effort read of cgroup metadata for container detection.""" for candidate in ("/proc/1/cgroup", "/proc/self/cgroup"): try: return Path(candidate).read_text(encoding="utf-8") except OSError: continue return "" def detect_runtime_environment( environ: dict[str, str] | None = None, ) -> RuntimeEnvironment: """Detect whether the current process is running on host, Docker, or Kubernetes.""" environ = environ or os.environ cgroup_content = _read_cgroup_content().lower() in_kubernetes = bool( environ.get("KUBERNETES_SERVICE_HOST") or Path("/var/run/secrets/kubernetes.io/serviceaccount").exists() or "kubepods" in cgroup_content or "kubernetes" in cgroup_content ) in_docker = bool( Path("/.dockerenv").exists() or Path("/run/.containerenv").exists() or any( marker in cgroup_content for marker in ("docker", "containerd", "libpod", "podman") ) ) return RuntimeEnvironment( in_container=in_kubernetes or in_docker, in_docker=in_docker, in_kubernetes=in_kubernetes, ) def load_runtime_target_from_env_file(env_path: str | Path = ".env") -> str | None: """Return the raw LIGHTRAG_RUNTIME_TARGET value from the `.env` file, if present.""" env_values = dotenv_values(str(env_path)) runtime_target = env_values.get("LIGHTRAG_RUNTIME_TARGET") if runtime_target is None: return None return runtime_target.strip() def validate_runtime_target( runtime_target: str | None, runtime_environment: RuntimeEnvironment | None = None, ) -> tuple[bool, str | None]: """Validate `.env` runtime target against the current runtime environment.""" if runtime_target is None: return True, None normalized_target = runtime_target.strip().lower() runtime_environment = runtime_environment or detect_runtime_environment() if normalized_target == "host": if runtime_environment.in_container: return ( False, "Configuration error in .env: LIGHTRAG_RUNTIME_TARGET=host.\n" "This value from .env requires the server process to run on the host, " f"but the current process is running inside {runtime_environment.label}.", ) return True, None if normalized_target in _CONTAINER_RUNTIME_TARGETS: if runtime_environment.in_container: return True, None return ( False, f"Configuration error in .env: LIGHTRAG_RUNTIME_TARGET={runtime_target}.\n" "This value from .env requires the server process to run inside Docker or " "Kubernetes, but the current process is running on the host.", ) return ( False, f"Configuration error in .env: LIGHTRAG_RUNTIME_TARGET={runtime_target!r}.\n" "This value from .env must be 'host' or 'compose' (alias: 'docker').", ) def validate_runtime_target_from_env_file( env_path: str | Path = ".env", runtime_environment: RuntimeEnvironment | None = None, ) -> tuple[bool, str | None]: """Load LIGHTRAG_RUNTIME_TARGET from `.env` and validate it if declared.""" runtime_target = load_runtime_target_from_env_file(env_path) return validate_runtime_target(runtime_target, runtime_environment) ================================================ FILE: lightrag/api/static/swagger-ui/swagger-ui-bundle.js ================================================ /*! For license information please see swagger-ui-bundle.js.LICENSE.txt */ !function webpackUniversalModuleDefinition(s,o){"object"==typeof exports&&"object"==typeof module?module.exports=o():"function"==typeof define&&define.amd?define([],o):"object"==typeof exports?exports.SwaggerUIBundle=o():s.SwaggerUIBundle=o()}(this,(()=>(()=>{var s={251:(s,o)=>{o.read=function(s,o,i,a,u){var _,w,x=8*u-a-1,C=(1<>1,L=-7,B=i?u-1:0,$=i?-1:1,U=s[o+B];for(B+=$,_=U&(1<<-L)-1,U>>=-L,L+=x;L>0;_=256*_+s[o+B],B+=$,L-=8);for(w=_&(1<<-L)-1,_>>=-L,L+=a;L>0;w=256*w+s[o+B],B+=$,L-=8);if(0===_)_=1-j;else{if(_===C)return w?NaN:1/0*(U?-1:1);w+=Math.pow(2,a),_-=j}return(U?-1:1)*w*Math.pow(2,_-a)},o.write=function(s,o,i,a,u,_){var w,x,C,j=8*_-u-1,L=(1<>1,$=23===u?Math.pow(2,-24)-Math.pow(2,-77):0,U=a?0:_-1,V=a?1:-1,z=o<0||0===o&&1/o<0?1:0;for(o=Math.abs(o),isNaN(o)||o===1/0?(x=isNaN(o)?1:0,w=L):(w=Math.floor(Math.log(o)/Math.LN2),o*(C=Math.pow(2,-w))<1&&(w--,C*=2),(o+=w+B>=1?$/C:$*Math.pow(2,1-B))*C>=2&&(w++,C/=2),w+B>=L?(x=0,w=L):w+B>=1?(x=(o*C-1)*Math.pow(2,u),w+=B):(x=o*Math.pow(2,B-1)*Math.pow(2,u),w=0));u>=8;s[i+U]=255&x,U+=V,x/=256,u-=8);for(w=w<0;s[i+U]=255&w,U+=V,w/=256,j-=8);s[i+U-V]|=128*z}},462:(s,o,i)=>{"use strict";var a=i(40975);s.exports=a},659:(s,o,i)=>{var a=i(51873),u=Object.prototype,_=u.hasOwnProperty,w=u.toString,x=a?a.toStringTag:void 0;s.exports=function getRawTag(s){var o=_.call(s,x),i=s[x];try{s[x]=void 0;var a=!0}catch(s){}var u=w.call(s);return a&&(o?s[x]=i:delete s[x]),u}},694:(s,o,i)=>{"use strict";i(91599);var a=i(37257);i(12560),s.exports=a},953:(s,o,i)=>{"use strict";s.exports=i(53375)},1733:s=>{var o=/[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g;s.exports=function asciiWords(s){return s.match(o)||[]}},1882:(s,o,i)=>{var a=i(72552),u=i(23805);s.exports=function isFunction(s){if(!u(s))return!1;var o=a(s);return"[object Function]"==o||"[object GeneratorFunction]"==o||"[object AsyncFunction]"==o||"[object Proxy]"==o}},1907:(s,o,i)=>{"use strict";var a=i(41505),u=Function.prototype,_=u.call,w=a&&u.bind.bind(_,_);s.exports=a?w:function(s){return function(){return _.apply(s,arguments)}}},2205:function(s,o,i){var a;a=void 0!==i.g?i.g:this,s.exports=function(s){if(s.CSS&&s.CSS.escape)return s.CSS.escape;var cssEscape=function(s){if(0==arguments.length)throw new TypeError("`CSS.escape` requires an argument.");for(var o,i=String(s),a=i.length,u=-1,_="",w=i.charCodeAt(0);++u=1&&o<=31||127==o||0==u&&o>=48&&o<=57||1==u&&o>=48&&o<=57&&45==w?"\\"+o.toString(16)+" ":0==u&&1==a&&45==o||!(o>=128||45==o||95==o||o>=48&&o<=57||o>=65&&o<=90||o>=97&&o<=122)?"\\"+i.charAt(u):i.charAt(u):_+="�";return _};return s.CSS||(s.CSS={}),s.CSS.escape=cssEscape,cssEscape}(a)},2209:(s,o,i)=>{"use strict";var a,u=i(9404),_=function productionTypeChecker(){invariant(!1,"ImmutablePropTypes type checking code is stripped in production.")};_.isRequired=_;var w=function getProductionTypeChecker(){return _};function getPropType(s){var o=typeof s;return Array.isArray(s)?"array":s instanceof RegExp?"object":s instanceof u.Iterable?"Immutable."+s.toSource().split(" ")[0]:o}function createChainableTypeChecker(s){function checkType(o,i,a,u,_,w){for(var x=arguments.length,C=Array(x>6?x-6:0),j=6;j>",null!=i[a]?s.apply(void 0,[i,a,u,_,w].concat(C)):o?new Error("Required "+_+" `"+w+"` was not specified in `"+u+"`."):void 0}var o=checkType.bind(null,!1);return o.isRequired=checkType.bind(null,!0),o}function createIterableSubclassTypeChecker(s,o){return function createImmutableTypeChecker(s,o){return createChainableTypeChecker((function validate(i,a,u,_,w){var x=i[a];if(!o(x)){var C=getPropType(x);return new Error("Invalid "+_+" `"+w+"` of type `"+C+"` supplied to `"+u+"`, expected `"+s+"`.")}return null}))}("Iterable."+s,(function(s){return u.Iterable.isIterable(s)&&o(s)}))}(a={listOf:w,mapOf:w,orderedMapOf:w,setOf:w,orderedSetOf:w,stackOf:w,iterableOf:w,recordOf:w,shape:w,contains:w,mapContains:w,orderedMapContains:w,list:_,map:_,orderedMap:_,set:_,orderedSet:_,stack:_,seq:_,record:_,iterable:_}).iterable.indexed=createIterableSubclassTypeChecker("Indexed",u.Iterable.isIndexed),a.iterable.keyed=createIterableSubclassTypeChecker("Keyed",u.Iterable.isKeyed),s.exports=a},2404:(s,o,i)=>{var a=i(60270);s.exports=function isEqual(s,o){return a(s,o)}},2523:s=>{s.exports=function baseFindIndex(s,o,i,a){for(var u=s.length,_=i+(a?1:-1);a?_--:++_{"use strict";var a=i(45951),u=Object.defineProperty;s.exports=function(s,o){try{u(a,s,{value:o,configurable:!0,writable:!0})}catch(i){a[s]=o}return o}},2694:(s,o,i)=>{"use strict";var a=i(6925);function emptyFunction(){}function emptyFunctionWithReset(){}emptyFunctionWithReset.resetWarningCache=emptyFunction,s.exports=function(){function shim(s,o,i,u,_,w){if(w!==a){var x=new Error("Calling PropTypes validators directly is not supported by the `prop-types` package. Use PropTypes.checkPropTypes() to call them. Read more at http://fb.me/use-check-prop-types");throw x.name="Invariant Violation",x}}function getShim(){return shim}shim.isRequired=shim;var s={array:shim,bigint:shim,bool:shim,func:shim,number:shim,object:shim,string:shim,symbol:shim,any:shim,arrayOf:getShim,element:shim,elementType:shim,instanceOf:getShim,node:shim,objectOf:getShim,oneOf:getShim,oneOfType:getShim,shape:getShim,exact:getShim,checkPropTypes:emptyFunctionWithReset,resetWarningCache:emptyFunction};return s.PropTypes=s,s}},2874:s=>{s.exports={}},2875:(s,o,i)=>{"use strict";var a=i(23045),u=i(80376);s.exports=Object.keys||function keys(s){return a(s,u)}},2955:(s,o,i)=>{"use strict";var a,u=i(65606);function _defineProperty(s,o,i){return(o=function _toPropertyKey(s){var o=function _toPrimitive(s,o){if("object"!=typeof s||null===s)return s;var i=s[Symbol.toPrimitive];if(void 0!==i){var a=i.call(s,o||"default");if("object"!=typeof a)return a;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===o?String:Number)(s)}(s,"string");return"symbol"==typeof o?o:String(o)}(o))in s?Object.defineProperty(s,o,{value:i,enumerable:!0,configurable:!0,writable:!0}):s[o]=i,s}var _=i(86238),w=Symbol("lastResolve"),x=Symbol("lastReject"),C=Symbol("error"),j=Symbol("ended"),L=Symbol("lastPromise"),B=Symbol("handlePromise"),$=Symbol("stream");function createIterResult(s,o){return{value:s,done:o}}function readAndResolve(s){var o=s[w];if(null!==o){var i=s[$].read();null!==i&&(s[L]=null,s[w]=null,s[x]=null,o(createIterResult(i,!1)))}}function onReadable(s){u.nextTick(readAndResolve,s)}var U=Object.getPrototypeOf((function(){})),V=Object.setPrototypeOf((_defineProperty(a={get stream(){return this[$]},next:function next(){var s=this,o=this[C];if(null!==o)return Promise.reject(o);if(this[j])return Promise.resolve(createIterResult(void 0,!0));if(this[$].destroyed)return new Promise((function(o,i){u.nextTick((function(){s[C]?i(s[C]):o(createIterResult(void 0,!0))}))}));var i,a=this[L];if(a)i=new Promise(function wrapForNext(s,o){return function(i,a){s.then((function(){o[j]?i(createIterResult(void 0,!0)):o[B](i,a)}),a)}}(a,this));else{var _=this[$].read();if(null!==_)return Promise.resolve(createIterResult(_,!1));i=new Promise(this[B])}return this[L]=i,i}},Symbol.asyncIterator,(function(){return this})),_defineProperty(a,"return",(function _return(){var s=this;return new Promise((function(o,i){s[$].destroy(null,(function(s){s?i(s):o(createIterResult(void 0,!0))}))}))})),a),U);s.exports=function createReadableStreamAsyncIterator(s){var o,i=Object.create(V,(_defineProperty(o={},$,{value:s,writable:!0}),_defineProperty(o,w,{value:null,writable:!0}),_defineProperty(o,x,{value:null,writable:!0}),_defineProperty(o,C,{value:null,writable:!0}),_defineProperty(o,j,{value:s._readableState.endEmitted,writable:!0}),_defineProperty(o,B,{value:function value(s,o){var a=i[$].read();a?(i[L]=null,i[w]=null,i[x]=null,s(createIterResult(a,!1))):(i[w]=s,i[x]=o)},writable:!0}),o));return i[L]=null,_(s,(function(s){if(s&&"ERR_STREAM_PREMATURE_CLOSE"!==s.code){var o=i[x];return null!==o&&(i[L]=null,i[w]=null,i[x]=null,o(s)),void(i[C]=s)}var a=i[w];null!==a&&(i[L]=null,i[w]=null,i[x]=null,a(createIterResult(void 0,!0))),i[j]=!0})),s.on("readable",onReadable.bind(null,i)),i}},3110:(s,o,i)=>{const a=i(5187),u=i(85015),_=i(98023),w=i(53812),x=i(23805),C=i(85105),j=i(86804);class Namespace{constructor(s){this.elementMap={},this.elementDetection=[],this.Element=j.Element,this.KeyValuePair=j.KeyValuePair,s&&s.noDefault||this.useDefault(),this._attributeElementKeys=[],this._attributeElementArrayKeys=[]}use(s){return s.namespace&&s.namespace({base:this}),s.load&&s.load({base:this}),this}useDefault(){return this.register("null",j.NullElement).register("string",j.StringElement).register("number",j.NumberElement).register("boolean",j.BooleanElement).register("array",j.ArrayElement).register("object",j.ObjectElement).register("member",j.MemberElement).register("ref",j.RefElement).register("link",j.LinkElement),this.detect(a,j.NullElement,!1).detect(u,j.StringElement,!1).detect(_,j.NumberElement,!1).detect(w,j.BooleanElement,!1).detect(Array.isArray,j.ArrayElement,!1).detect(x,j.ObjectElement,!1),this}register(s,o){return this._elements=void 0,this.elementMap[s]=o,this}unregister(s){return this._elements=void 0,delete this.elementMap[s],this}detect(s,o,i){return void 0===i||i?this.elementDetection.unshift([s,o]):this.elementDetection.push([s,o]),this}toElement(s){if(s instanceof this.Element)return s;let o;for(let i=0;i{const o=s[0].toUpperCase()+s.substr(1);this._elements[o]=this.elementMap[s]}))),this._elements}get serialiser(){return new C(this)}}C.prototype.Namespace=Namespace,s.exports=Namespace},3121:(s,o,i)=>{"use strict";var a=i(65482),u=Math.min;s.exports=function(s){var o=a(s);return o>0?u(o,9007199254740991):0}},3209:(s,o,i)=>{var a=i(91596),u=i(53320),_=i(36306),w="__lodash_placeholder__",x=128,C=Math.min;s.exports=function mergeData(s,o){var i=s[1],j=o[1],L=i|j,B=L<131,$=j==x&&8==i||j==x&&256==i&&s[7].length<=o[8]||384==j&&o[7].length<=o[8]&&8==i;if(!B&&!$)return s;1&j&&(s[2]=o[2],L|=1&i?0:4);var U=o[3];if(U){var V=s[3];s[3]=V?a(V,U,o[4]):U,s[4]=V?_(s[3],w):o[4]}return(U=o[5])&&(V=s[5],s[5]=V?u(V,U,o[6]):U,s[6]=V?_(s[5],w):o[6]),(U=o[7])&&(s[7]=U),j&x&&(s[8]=null==s[8]?o[8]:C(s[8],o[8])),null==s[9]&&(s[9]=o[9]),s[0]=o[0],s[1]=L,s}},3650:(s,o,i)=>{var a=i(74335)(Object.keys,Object);s.exports=a},3656:(s,o,i)=>{s=i.nmd(s);var a=i(9325),u=i(89935),_=o&&!o.nodeType&&o,w=_&&s&&!s.nodeType&&s,x=w&&w.exports===_?a.Buffer:void 0,C=(x?x.isBuffer:void 0)||u;s.exports=C},4509:(s,o,i)=>{var a=i(12651);s.exports=function mapCacheHas(s){return a(this,s).has(s)}},4640:s=>{"use strict";var o=String;s.exports=function(s){try{return o(s)}catch(s){return"Object"}}},4664:(s,o,i)=>{var a=i(79770),u=i(63345),_=Object.prototype.propertyIsEnumerable,w=Object.getOwnPropertySymbols,x=w?function(s){return null==s?[]:(s=Object(s),a(w(s),(function(o){return _.call(s,o)})))}:u;s.exports=x},4901:(s,o,i)=>{var a=i(72552),u=i(30294),_=i(40346),w={};w["[object Float32Array]"]=w["[object Float64Array]"]=w["[object Int8Array]"]=w["[object Int16Array]"]=w["[object Int32Array]"]=w["[object Uint8Array]"]=w["[object Uint8ClampedArray]"]=w["[object Uint16Array]"]=w["[object Uint32Array]"]=!0,w["[object Arguments]"]=w["[object Array]"]=w["[object ArrayBuffer]"]=w["[object Boolean]"]=w["[object DataView]"]=w["[object Date]"]=w["[object Error]"]=w["[object Function]"]=w["[object Map]"]=w["[object Number]"]=w["[object Object]"]=w["[object RegExp]"]=w["[object Set]"]=w["[object String]"]=w["[object WeakMap]"]=!1,s.exports=function baseIsTypedArray(s){return _(s)&&u(s.length)&&!!w[a(s)]}},4993:(s,o,i)=>{"use strict";var a=i(16946),u=i(74239);s.exports=function(s){return a(u(s))}},5187:s=>{s.exports=function isNull(s){return null===s}},5419:s=>{s.exports=function(s,o,i,a){var u=new Blob(void 0!==a?[a,s]:[s],{type:i||"application/octet-stream"});if(void 0!==window.navigator.msSaveBlob)window.navigator.msSaveBlob(u,o);else{var _=window.URL&&window.URL.createObjectURL?window.URL.createObjectURL(u):window.webkitURL.createObjectURL(u),w=document.createElement("a");w.style.display="none",w.href=_,w.setAttribute("download",o),void 0===w.download&&w.setAttribute("target","_blank"),document.body.appendChild(w),w.click(),setTimeout((function(){document.body.removeChild(w),window.URL.revokeObjectURL(_)}),200)}}},5556:(s,o,i)=>{s.exports=i(2694)()},5861:(s,o,i)=>{var a=i(55580),u=i(68223),_=i(32804),w=i(76545),x=i(28303),C=i(72552),j=i(47473),L="[object Map]",B="[object Promise]",$="[object Set]",U="[object WeakMap]",V="[object DataView]",z=j(a),Y=j(u),Z=j(_),ee=j(w),ie=j(x),ae=C;(a&&ae(new a(new ArrayBuffer(1)))!=V||u&&ae(new u)!=L||_&&ae(_.resolve())!=B||w&&ae(new w)!=$||x&&ae(new x)!=U)&&(ae=function(s){var o=C(s),i="[object Object]"==o?s.constructor:void 0,a=i?j(i):"";if(a)switch(a){case z:return V;case Y:return L;case Z:return B;case ee:return $;case ie:return U}return o}),s.exports=ae},6048:s=>{s.exports=function negate(s){if("function"!=typeof s)throw new TypeError("Expected a function");return function(){var o=arguments;switch(o.length){case 0:return!s.call(this);case 1:return!s.call(this,o[0]);case 2:return!s.call(this,o[0],o[1]);case 3:return!s.call(this,o[0],o[1],o[2])}return!s.apply(this,o)}}},6188:s=>{"use strict";s.exports=Math.max},6205:s=>{s.exports={ROOT:0,GROUP:1,POSITION:2,SET:3,RANGE:4,REPETITION:5,REFERENCE:6,CHAR:7}},6233:(s,o,i)=>{const a=i(6048),u=i(10316),_=i(92340);class ArrayElement extends u{constructor(s,o,i){super(s||[],o,i),this.element="array"}primitive(){return"array"}get(s){return this.content[s]}getValue(s){const o=this.get(s);if(o)return o.toValue()}getIndex(s){return this.content[s]}set(s,o){return this.content[s]=this.refract(o),this}remove(s){const o=this.content.splice(s,1);return o.length?o[0]:null}map(s,o){return this.content.map(s,o)}flatMap(s,o){return this.map(s,o).reduce(((s,o)=>s.concat(o)),[])}compactMap(s,o){const i=[];return this.forEach((a=>{const u=s.bind(o)(a);u&&i.push(u)})),i}filter(s,o){return new _(this.content.filter(s,o))}reject(s,o){return this.filter(a(s),o)}reduce(s,o){let i,a;void 0!==o?(i=0,a=this.refract(o)):(i=1,a="object"===this.primitive()?this.first.value:this.first);for(let o=i;o{s.bind(o)(i,this.refract(a))}))}shift(){return this.content.shift()}unshift(s){this.content.unshift(this.refract(s))}push(s){return this.content.push(this.refract(s)),this}add(s){this.push(s)}findElements(s,o){const i=o||{},a=!!i.recursive,u=void 0===i.results?[]:i.results;return this.forEach(((o,i,_)=>{a&&void 0!==o.findElements&&o.findElements(s,{results:u,recursive:a}),s(o,i,_)&&u.push(o)})),u}find(s){return new _(this.findElements(s,{recursive:!0}))}findByElement(s){return this.find((o=>o.element===s))}findByClass(s){return this.find((o=>o.classes.includes(s)))}getById(s){return this.find((o=>o.id.toValue()===s)).first}includes(s){return this.content.some((o=>o.equals(s)))}contains(s){return this.includes(s)}empty(){return new this.constructor([])}"fantasy-land/empty"(){return this.empty()}concat(s){return new this.constructor(this.content.concat(s.content))}"fantasy-land/concat"(s){return this.concat(s)}"fantasy-land/map"(s){return new this.constructor(this.map(s))}"fantasy-land/chain"(s){return this.map((o=>s(o)),this).reduce(((s,o)=>s.concat(o)),this.empty())}"fantasy-land/filter"(s){return new this.constructor(this.content.filter(s))}"fantasy-land/reduce"(s,o){return this.content.reduce(s,o)}get length(){return this.content.length}get isEmpty(){return 0===this.content.length}get first(){return this.getIndex(0)}get second(){return this.getIndex(1)}get last(){return this.getIndex(this.length-1)}}ArrayElement.empty=function empty(){return new this},ArrayElement["fantasy-land/empty"]=ArrayElement.empty,"undefined"!=typeof Symbol&&(ArrayElement.prototype[Symbol.iterator]=function symbol(){return this.content[Symbol.iterator]()}),s.exports=ArrayElement},6499:(s,o,i)=>{"use strict";var a=i(1907),u=0,_=Math.random(),w=a(1..toString);s.exports=function(s){return"Symbol("+(void 0===s?"":s)+")_"+w(++u+_,36)}},6549:s=>{"use strict";s.exports=Object.getOwnPropertyDescriptor},6925:s=>{"use strict";s.exports="SECRET_DO_NOT_PASS_THIS_OR_YOU_WILL_BE_FIRED"},7057:(s,o,i)=>{"use strict";var a=i(11470).charAt,u=i(90160),_=i(64932),w=i(60183),x=i(59550),C="String Iterator",j=_.set,L=_.getterFor(C);w(String,"String",(function(s){j(this,{type:C,string:u(s),index:0})}),(function next(){var s,o=L(this),i=o.string,u=o.index;return u>=i.length?x(void 0,!0):(s=a(i,u),o.index+=s.length,x(s,!1))}))},7176:(s,o,i)=>{"use strict";var a,u=i(73126),_=i(75795);try{a=[].__proto__===Array.prototype}catch(s){if(!s||"object"!=typeof s||!("code"in s)||"ERR_PROTO_ACCESS"!==s.code)throw s}var w=!!a&&_&&_(Object.prototype,"__proto__"),x=Object,C=x.getPrototypeOf;s.exports=w&&"function"==typeof w.get?u([w.get]):"function"==typeof C&&function getDunder(s){return C(null==s?s:x(s))}},7309:(s,o,i)=>{var a=i(62006)(i(24713));s.exports=a},7376:s=>{"use strict";s.exports=!0},7463:(s,o,i)=>{"use strict";var a=i(98828),u=i(62250),_=/#|\.prototype\./,isForced=function(s,o){var i=x[w(s)];return i===j||i!==C&&(u(o)?a(o):!!o)},w=isForced.normalize=function(s){return String(s).replace(_,".").toLowerCase()},x=isForced.data={},C=isForced.NATIVE="N",j=isForced.POLYFILL="P";s.exports=isForced},7666:(s,o,i)=>{var a=i(84851),u=i(953);function _extends(){var o;return s.exports=_extends=a?u(o=a).call(o):function(s){for(var o=1;o{const a=i(6205);o.wordBoundary=()=>({type:a.POSITION,value:"b"}),o.nonWordBoundary=()=>({type:a.POSITION,value:"B"}),o.begin=()=>({type:a.POSITION,value:"^"}),o.end=()=>({type:a.POSITION,value:"$"})},8068:s=>{"use strict";var o=(()=>{var s=Object.defineProperty,o=Object.getOwnPropertyDescriptor,i=Object.getOwnPropertyNames,a=Object.getOwnPropertySymbols,u=Object.prototype.hasOwnProperty,_=Object.prototype.propertyIsEnumerable,__defNormalProp=(o,i,a)=>i in o?s(o,i,{enumerable:!0,configurable:!0,writable:!0,value:a}):o[i]=a,__spreadValues=(s,o)=>{for(var i in o||(o={}))u.call(o,i)&&__defNormalProp(s,i,o[i]);if(a)for(var i of a(o))_.call(o,i)&&__defNormalProp(s,i,o[i]);return s},__publicField=(s,o,i)=>__defNormalProp(s,"symbol"!=typeof o?o+"":o,i),w={};((o,i)=>{for(var a in i)s(o,a,{get:i[a],enumerable:!0})})(w,{DEFAULT_OPTIONS:()=>C,DEFAULT_UUID_LENGTH:()=>x,default:()=>B});var x=6,C={dictionary:"alphanum",shuffle:!0,debug:!1,length:x,counter:0},j=class _ShortUniqueId{constructor(s={}){__publicField(this,"counter"),__publicField(this,"debug"),__publicField(this,"dict"),__publicField(this,"version"),__publicField(this,"dictIndex",0),__publicField(this,"dictRange",[]),__publicField(this,"lowerBound",0),__publicField(this,"upperBound",0),__publicField(this,"dictLength",0),__publicField(this,"uuidLength"),__publicField(this,"_digit_first_ascii",48),__publicField(this,"_digit_last_ascii",58),__publicField(this,"_alpha_lower_first_ascii",97),__publicField(this,"_alpha_lower_last_ascii",123),__publicField(this,"_hex_last_ascii",103),__publicField(this,"_alpha_upper_first_ascii",65),__publicField(this,"_alpha_upper_last_ascii",91),__publicField(this,"_number_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii]}),__publicField(this,"_alpha_dict_ranges",{lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii],upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_alpha_lower_dict_ranges",{lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii]}),__publicField(this,"_alpha_upper_dict_ranges",{upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_alphanum_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii],lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii],upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_alphanum_lower_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii],lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii]}),__publicField(this,"_alphanum_upper_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii],upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_hex_dict_ranges",{decDigits:[this._digit_first_ascii,this._digit_last_ascii],alphaDigits:[this._alpha_lower_first_ascii,this._hex_last_ascii]}),__publicField(this,"_dict_ranges",{_number_dict_ranges:this._number_dict_ranges,_alpha_dict_ranges:this._alpha_dict_ranges,_alpha_lower_dict_ranges:this._alpha_lower_dict_ranges,_alpha_upper_dict_ranges:this._alpha_upper_dict_ranges,_alphanum_dict_ranges:this._alphanum_dict_ranges,_alphanum_lower_dict_ranges:this._alphanum_lower_dict_ranges,_alphanum_upper_dict_ranges:this._alphanum_upper_dict_ranges,_hex_dict_ranges:this._hex_dict_ranges}),__publicField(this,"log",((...s)=>{const o=[...s];o[0]="[short-unique-id] ".concat(s[0]),!0!==this.debug||"undefined"==typeof console||null===console||console.log(...o)})),__publicField(this,"_normalizeDictionary",((s,o)=>{let i;if(s&&Array.isArray(s)&&s.length>1)i=s;else{i=[],this.dictIndex=0;const o="_".concat(s,"_dict_ranges"),a=this._dict_ranges[o];let u=0;for(const[,s]of Object.entries(a)){const[o,i]=s;u+=Math.abs(i-o)}i=new Array(u);let _=0;for(const[,s]of Object.entries(a)){this.dictRange=s,this.lowerBound=this.dictRange[0],this.upperBound=this.dictRange[1];const o=this.lowerBound<=this.upperBound,a=this.lowerBound,u=this.upperBound;if(o)for(let s=a;su;s--)i[_++]=String.fromCharCode(s),this.dictIndex=s}i.length=_}if(o){for(let s=i.length-1;s>0;s--){const o=Math.floor(Math.random()*(s+1));[i[s],i[o]]=[i[o],i[s]]}}return i})),__publicField(this,"setDictionary",((s,o)=>{this.dict=this._normalizeDictionary(s,o),this.dictLength=this.dict.length,this.setCounter(0)})),__publicField(this,"seq",(()=>this.sequentialUUID())),__publicField(this,"sequentialUUID",(()=>{const s=this.dictLength,o=this.dict;let i=this.counter;const a=[];do{const u=i%s;i=Math.trunc(i/s),a.push(o[u])}while(0!==i);const u=a.join("");return this.counter+=1,u})),__publicField(this,"rnd",((s=this.uuidLength||x)=>this.randomUUID(s))),__publicField(this,"randomUUID",((s=this.uuidLength||x)=>{if(null==s||s<1)throw new Error("Invalid UUID Length Provided");const o=new Array(s),i=this.dictLength,a=this.dict;for(let u=0;uthis.formattedUUID(s,o))),__publicField(this,"formattedUUID",((s,o)=>{const i={$r:this.randomUUID,$s:this.sequentialUUID,$t:this.stamp};return s.replace(/\$[rs]\d{0,}|\$t0|\$t[1-9]\d{1,}/g,(s=>{const a=s.slice(0,2),u=Number.parseInt(s.slice(2),10);return"$s"===a?i[a]().padStart(u,"0"):"$t"===a&&o?i[a](u,o):i[a](u)}))})),__publicField(this,"availableUUIDs",((s=this.uuidLength)=>Number.parseFloat(([...new Set(this.dict)].length**s).toFixed(0)))),__publicField(this,"_collisionCache",new Map),__publicField(this,"approxMaxBeforeCollision",((s=this.availableUUIDs(this.uuidLength))=>{const o=s,i=this._collisionCache.get(o);if(void 0!==i)return i;const a=Number.parseFloat(Math.sqrt(Math.PI/2*s).toFixed(20));return this._collisionCache.set(o,a),a})),__publicField(this,"collisionProbability",((s=this.availableUUIDs(this.uuidLength),o=this.uuidLength)=>Number.parseFloat((this.approxMaxBeforeCollision(s)/this.availableUUIDs(o)).toFixed(20)))),__publicField(this,"uniqueness",((s=this.availableUUIDs(this.uuidLength))=>{const o=Number.parseFloat((1-this.approxMaxBeforeCollision(s)/s).toFixed(20));return o>1?1:o<0?0:o})),__publicField(this,"getVersion",(()=>this.version)),__publicField(this,"stamp",((s,o)=>{const i=Math.floor(+(o||new Date)/1e3).toString(16);if("number"==typeof s&&0===s)return i;if("number"!=typeof s||s<10)throw new Error(["Param finalLength must be a number greater than or equal to 10,","or 0 if you want the raw hexadecimal timestamp"].join("\n"));const a=s-9,u=Math.round(Math.random()*(a>15?15:a)),_=this.randomUUID(a);return"".concat(_.substring(0,u)).concat(i).concat(_.substring(u)).concat(u.toString(16))})),__publicField(this,"parseStamp",((s,o)=>{if(o&&!/t0|t[1-9]\d{1,}/.test(o))throw new Error("Cannot extract date from a formated UUID with no timestamp in the format");const i=o?o.replace(/\$[rs]\d{0,}|\$t0|\$t[1-9]\d{1,}/g,(s=>{const o={$r:s=>[...Array(s)].map((()=>"r")).join(""),$s:s=>[...Array(s)].map((()=>"s")).join(""),$t:s=>[...Array(s)].map((()=>"t")).join("")},i=s.slice(0,2),a=Number.parseInt(s.slice(2),10);return o[i](a)})).replace(/^(.*?)(t{8,})(.*)$/g,((o,i,a)=>s.substring(i.length,i.length+a.length))):s;if(8===i.length)return new Date(1e3*Number.parseInt(i,16));if(i.length<10)throw new Error("Stamp length invalid");const a=Number.parseInt(i.substring(i.length-1),16);return new Date(1e3*Number.parseInt(i.substring(a,a+8),16))})),__publicField(this,"setCounter",(s=>{this.counter=s})),__publicField(this,"validate",((s,o)=>{const i=o?this._normalizeDictionary(o):this.dict;return s.split("").every((s=>i.includes(s)))}));const o=__spreadValues(__spreadValues({},C),s);this.counter=0,this.debug=!1,this.dict=[],this.version="5.3.2";const{dictionary:i,shuffle:a,length:u,counter:_}=o;this.uuidLength=u,this.setDictionary(i,a),this.setCounter(_),this.debug=o.debug,this.log(this.dict),this.log("Generator instantiated with Dictionary Size ".concat(this.dictLength," and counter set to ").concat(this.counter)),this.log=this.log.bind(this),this.setDictionary=this.setDictionary.bind(this),this.setCounter=this.setCounter.bind(this),this.seq=this.seq.bind(this),this.sequentialUUID=this.sequentialUUID.bind(this),this.rnd=this.rnd.bind(this),this.randomUUID=this.randomUUID.bind(this),this.fmt=this.fmt.bind(this),this.formattedUUID=this.formattedUUID.bind(this),this.availableUUIDs=this.availableUUIDs.bind(this),this.approxMaxBeforeCollision=this.approxMaxBeforeCollision.bind(this),this.collisionProbability=this.collisionProbability.bind(this),this.uniqueness=this.uniqueness.bind(this),this.getVersion=this.getVersion.bind(this),this.stamp=this.stamp.bind(this),this.parseStamp=this.parseStamp.bind(this)}};__publicField(j,"default",j);var L,B=j;return L=w,((a,_,w,x)=>{if(_&&"object"==typeof _||"function"==typeof _)for(let C of i(_))u.call(a,C)||C===w||s(a,C,{get:()=>_[C],enumerable:!(x=o(_,C))||x.enumerable});return a})(s({},"__esModule",{value:!0}),L)})();s.exports=o.default,"undefined"!=typeof window&&(o=o.default)},9325:(s,o,i)=>{var a=i(34840),u="object"==typeof self&&self&&self.Object===Object&&self,_=a||u||Function("return this")();s.exports=_},9404:function(s){s.exports=function(){"use strict";var s=Array.prototype.slice;function createClass(s,o){o&&(s.prototype=Object.create(o.prototype)),s.prototype.constructor=s}function Iterable(s){return isIterable(s)?s:Seq(s)}function KeyedIterable(s){return isKeyed(s)?s:KeyedSeq(s)}function IndexedIterable(s){return isIndexed(s)?s:IndexedSeq(s)}function SetIterable(s){return isIterable(s)&&!isAssociative(s)?s:SetSeq(s)}function isIterable(s){return!(!s||!s[o])}function isKeyed(s){return!(!s||!s[i])}function isIndexed(s){return!(!s||!s[a])}function isAssociative(s){return isKeyed(s)||isIndexed(s)}function isOrdered(s){return!(!s||!s[u])}createClass(KeyedIterable,Iterable),createClass(IndexedIterable,Iterable),createClass(SetIterable,Iterable),Iterable.isIterable=isIterable,Iterable.isKeyed=isKeyed,Iterable.isIndexed=isIndexed,Iterable.isAssociative=isAssociative,Iterable.isOrdered=isOrdered,Iterable.Keyed=KeyedIterable,Iterable.Indexed=IndexedIterable,Iterable.Set=SetIterable;var o="@@__IMMUTABLE_ITERABLE__@@",i="@@__IMMUTABLE_KEYED__@@",a="@@__IMMUTABLE_INDEXED__@@",u="@@__IMMUTABLE_ORDERED__@@",_="delete",w=5,x=1<>>0;if(""+i!==o||4294967295===i)return NaN;o=i}return o<0?ensureSize(s)+o:o}function returnTrue(){return!0}function wholeSlice(s,o,i){return(0===s||void 0!==i&&s<=-i)&&(void 0===o||void 0!==i&&o>=i)}function resolveBegin(s,o){return resolveIndex(s,o,0)}function resolveEnd(s,o){return resolveIndex(s,o,o)}function resolveIndex(s,o,i){return void 0===s?i:s<0?Math.max(0,o+s):void 0===o?s:Math.min(o,s)}var $=0,U=1,V=2,z="function"==typeof Symbol&&Symbol.iterator,Y="@@iterator",Z=z||Y;function Iterator(s){this.next=s}function iteratorValue(s,o,i,a){var u=0===s?o:1===s?i:[o,i];return a?a.value=u:a={value:u,done:!1},a}function iteratorDone(){return{value:void 0,done:!0}}function hasIterator(s){return!!getIteratorFn(s)}function isIterator(s){return s&&"function"==typeof s.next}function getIterator(s){var o=getIteratorFn(s);return o&&o.call(s)}function getIteratorFn(s){var o=s&&(z&&s[z]||s[Y]);if("function"==typeof o)return o}function isArrayLike(s){return s&&"number"==typeof s.length}function Seq(s){return null==s?emptySequence():isIterable(s)?s.toSeq():seqFromValue(s)}function KeyedSeq(s){return null==s?emptySequence().toKeyedSeq():isIterable(s)?isKeyed(s)?s.toSeq():s.fromEntrySeq():keyedSeqFromValue(s)}function IndexedSeq(s){return null==s?emptySequence():isIterable(s)?isKeyed(s)?s.entrySeq():s.toIndexedSeq():indexedSeqFromValue(s)}function SetSeq(s){return(null==s?emptySequence():isIterable(s)?isKeyed(s)?s.entrySeq():s:indexedSeqFromValue(s)).toSetSeq()}Iterator.prototype.toString=function(){return"[Iterator]"},Iterator.KEYS=$,Iterator.VALUES=U,Iterator.ENTRIES=V,Iterator.prototype.inspect=Iterator.prototype.toSource=function(){return this.toString()},Iterator.prototype[Z]=function(){return this},createClass(Seq,Iterable),Seq.of=function(){return Seq(arguments)},Seq.prototype.toSeq=function(){return this},Seq.prototype.toString=function(){return this.__toString("Seq {","}")},Seq.prototype.cacheResult=function(){return!this._cache&&this.__iterateUncached&&(this._cache=this.entrySeq().toArray(),this.size=this._cache.length),this},Seq.prototype.__iterate=function(s,o){return seqIterate(this,s,o,!0)},Seq.prototype.__iterator=function(s,o){return seqIterator(this,s,o,!0)},createClass(KeyedSeq,Seq),KeyedSeq.prototype.toKeyedSeq=function(){return this},createClass(IndexedSeq,Seq),IndexedSeq.of=function(){return IndexedSeq(arguments)},IndexedSeq.prototype.toIndexedSeq=function(){return this},IndexedSeq.prototype.toString=function(){return this.__toString("Seq [","]")},IndexedSeq.prototype.__iterate=function(s,o){return seqIterate(this,s,o,!1)},IndexedSeq.prototype.__iterator=function(s,o){return seqIterator(this,s,o,!1)},createClass(SetSeq,Seq),SetSeq.of=function(){return SetSeq(arguments)},SetSeq.prototype.toSetSeq=function(){return this},Seq.isSeq=isSeq,Seq.Keyed=KeyedSeq,Seq.Set=SetSeq,Seq.Indexed=IndexedSeq;var ee,ie,ae,ce="@@__IMMUTABLE_SEQ__@@";function ArraySeq(s){this._array=s,this.size=s.length}function ObjectSeq(s){var o=Object.keys(s);this._object=s,this._keys=o,this.size=o.length}function IterableSeq(s){this._iterable=s,this.size=s.length||s.size}function IteratorSeq(s){this._iterator=s,this._iteratorCache=[]}function isSeq(s){return!(!s||!s[ce])}function emptySequence(){return ee||(ee=new ArraySeq([]))}function keyedSeqFromValue(s){var o=Array.isArray(s)?new ArraySeq(s).fromEntrySeq():isIterator(s)?new IteratorSeq(s).fromEntrySeq():hasIterator(s)?new IterableSeq(s).fromEntrySeq():"object"==typeof s?new ObjectSeq(s):void 0;if(!o)throw new TypeError("Expected Array or iterable object of [k, v] entries, or keyed object: "+s);return o}function indexedSeqFromValue(s){var o=maybeIndexedSeqFromValue(s);if(!o)throw new TypeError("Expected Array or iterable object of values: "+s);return o}function seqFromValue(s){var o=maybeIndexedSeqFromValue(s)||"object"==typeof s&&new ObjectSeq(s);if(!o)throw new TypeError("Expected Array or iterable object of values, or keyed object: "+s);return o}function maybeIndexedSeqFromValue(s){return isArrayLike(s)?new ArraySeq(s):isIterator(s)?new IteratorSeq(s):hasIterator(s)?new IterableSeq(s):void 0}function seqIterate(s,o,i,a){var u=s._cache;if(u){for(var _=u.length-1,w=0;w<=_;w++){var x=u[i?_-w:w];if(!1===o(x[1],a?x[0]:w,s))return w+1}return w}return s.__iterateUncached(o,i)}function seqIterator(s,o,i,a){var u=s._cache;if(u){var _=u.length-1,w=0;return new Iterator((function(){var s=u[i?_-w:w];return w++>_?iteratorDone():iteratorValue(o,a?s[0]:w-1,s[1])}))}return s.__iteratorUncached(o,i)}function fromJS(s,o){return o?fromJSWith(o,s,"",{"":s}):fromJSDefault(s)}function fromJSWith(s,o,i,a){return Array.isArray(o)?s.call(a,i,IndexedSeq(o).map((function(i,a){return fromJSWith(s,i,a,o)}))):isPlainObj(o)?s.call(a,i,KeyedSeq(o).map((function(i,a){return fromJSWith(s,i,a,o)}))):o}function fromJSDefault(s){return Array.isArray(s)?IndexedSeq(s).map(fromJSDefault).toList():isPlainObj(s)?KeyedSeq(s).map(fromJSDefault).toMap():s}function isPlainObj(s){return s&&(s.constructor===Object||void 0===s.constructor)}function is(s,o){if(s===o||s!=s&&o!=o)return!0;if(!s||!o)return!1;if("function"==typeof s.valueOf&&"function"==typeof o.valueOf){if((s=s.valueOf())===(o=o.valueOf())||s!=s&&o!=o)return!0;if(!s||!o)return!1}return!("function"!=typeof s.equals||"function"!=typeof o.equals||!s.equals(o))}function deepEqual(s,o){if(s===o)return!0;if(!isIterable(o)||void 0!==s.size&&void 0!==o.size&&s.size!==o.size||void 0!==s.__hash&&void 0!==o.__hash&&s.__hash!==o.__hash||isKeyed(s)!==isKeyed(o)||isIndexed(s)!==isIndexed(o)||isOrdered(s)!==isOrdered(o))return!1;if(0===s.size&&0===o.size)return!0;var i=!isAssociative(s);if(isOrdered(s)){var a=s.entries();return o.every((function(s,o){var u=a.next().value;return u&&is(u[1],s)&&(i||is(u[0],o))}))&&a.next().done}var u=!1;if(void 0===s.size)if(void 0===o.size)"function"==typeof s.cacheResult&&s.cacheResult();else{u=!0;var _=s;s=o,o=_}var w=!0,x=o.__iterate((function(o,a){if(i?!s.has(o):u?!is(o,s.get(a,j)):!is(s.get(a,j),o))return w=!1,!1}));return w&&s.size===x}function Repeat(s,o){if(!(this instanceof Repeat))return new Repeat(s,o);if(this._value=s,this.size=void 0===o?1/0:Math.max(0,o),0===this.size){if(ie)return ie;ie=this}}function invariant(s,o){if(!s)throw new Error(o)}function Range(s,o,i){if(!(this instanceof Range))return new Range(s,o,i);if(invariant(0!==i,"Cannot step a Range by 0"),s=s||0,void 0===o&&(o=1/0),i=void 0===i?1:Math.abs(i),oa?iteratorDone():iteratorValue(s,u,i[o?a-u++:u++])}))},createClass(ObjectSeq,KeyedSeq),ObjectSeq.prototype.get=function(s,o){return void 0===o||this.has(s)?this._object[s]:o},ObjectSeq.prototype.has=function(s){return this._object.hasOwnProperty(s)},ObjectSeq.prototype.__iterate=function(s,o){for(var i=this._object,a=this._keys,u=a.length-1,_=0;_<=u;_++){var w=a[o?u-_:_];if(!1===s(i[w],w,this))return _+1}return _},ObjectSeq.prototype.__iterator=function(s,o){var i=this._object,a=this._keys,u=a.length-1,_=0;return new Iterator((function(){var w=a[o?u-_:_];return _++>u?iteratorDone():iteratorValue(s,w,i[w])}))},ObjectSeq.prototype[u]=!0,createClass(IterableSeq,IndexedSeq),IterableSeq.prototype.__iterateUncached=function(s,o){if(o)return this.cacheResult().__iterate(s,o);var i=getIterator(this._iterable),a=0;if(isIterator(i))for(var u;!(u=i.next()).done&&!1!==s(u.value,a++,this););return a},IterableSeq.prototype.__iteratorUncached=function(s,o){if(o)return this.cacheResult().__iterator(s,o);var i=getIterator(this._iterable);if(!isIterator(i))return new Iterator(iteratorDone);var a=0;return new Iterator((function(){var o=i.next();return o.done?o:iteratorValue(s,a++,o.value)}))},createClass(IteratorSeq,IndexedSeq),IteratorSeq.prototype.__iterateUncached=function(s,o){if(o)return this.cacheResult().__iterate(s,o);for(var i,a=this._iterator,u=this._iteratorCache,_=0;_=a.length){var o=i.next();if(o.done)return o;a[u]=o.value}return iteratorValue(s,u,a[u++])}))},createClass(Repeat,IndexedSeq),Repeat.prototype.toString=function(){return 0===this.size?"Repeat []":"Repeat [ "+this._value+" "+this.size+" times ]"},Repeat.prototype.get=function(s,o){return this.has(s)?this._value:o},Repeat.prototype.includes=function(s){return is(this._value,s)},Repeat.prototype.slice=function(s,o){var i=this.size;return wholeSlice(s,o,i)?this:new Repeat(this._value,resolveEnd(o,i)-resolveBegin(s,i))},Repeat.prototype.reverse=function(){return this},Repeat.prototype.indexOf=function(s){return is(this._value,s)?0:-1},Repeat.prototype.lastIndexOf=function(s){return is(this._value,s)?this.size:-1},Repeat.prototype.__iterate=function(s,o){for(var i=0;i=0&&o=0&&ii?iteratorDone():iteratorValue(s,_++,w)}))},Range.prototype.equals=function(s){return s instanceof Range?this._start===s._start&&this._end===s._end&&this._step===s._step:deepEqual(this,s)},createClass(Collection,Iterable),createClass(KeyedCollection,Collection),createClass(IndexedCollection,Collection),createClass(SetCollection,Collection),Collection.Keyed=KeyedCollection,Collection.Indexed=IndexedCollection,Collection.Set=SetCollection;var le="function"==typeof Math.imul&&-2===Math.imul(4294967295,2)?Math.imul:function imul(s,o){var i=65535&(s|=0),a=65535&(o|=0);return i*a+((s>>>16)*a+i*(o>>>16)<<16>>>0)|0};function smi(s){return s>>>1&1073741824|3221225471&s}function hash(s){if(!1===s||null==s)return 0;if("function"==typeof s.valueOf&&(!1===(s=s.valueOf())||null==s))return 0;if(!0===s)return 1;var o=typeof s;if("number"===o){if(s!=s||s===1/0)return 0;var i=0|s;for(i!==s&&(i^=4294967295*s);s>4294967295;)i^=s/=4294967295;return smi(i)}if("string"===o)return s.length>Se?cachedHashString(s):hashString(s);if("function"==typeof s.hashCode)return s.hashCode();if("object"===o)return hashJSObj(s);if("function"==typeof s.toString)return hashString(s.toString());throw new Error("Value type "+o+" cannot be hashed.")}function cachedHashString(s){var o=Pe[s];return void 0===o&&(o=hashString(s),xe===we&&(xe=0,Pe={}),xe++,Pe[s]=o),o}function hashString(s){for(var o=0,i=0;i0)switch(s.nodeType){case 1:return s.uniqueID;case 9:return s.documentElement&&s.documentElement.uniqueID}}var fe,ye="function"==typeof WeakMap;ye&&(fe=new WeakMap);var be=0,_e="__immutablehash__";"function"==typeof Symbol&&(_e=Symbol(_e));var Se=16,we=255,xe=0,Pe={};function assertNotInfinite(s){invariant(s!==1/0,"Cannot perform this action with an infinite size.")}function Map(s){return null==s?emptyMap():isMap(s)&&!isOrdered(s)?s:emptyMap().withMutations((function(o){var i=KeyedIterable(s);assertNotInfinite(i.size),i.forEach((function(s,i){return o.set(i,s)}))}))}function isMap(s){return!(!s||!s[Re])}createClass(Map,KeyedCollection),Map.of=function(){var o=s.call(arguments,0);return emptyMap().withMutations((function(s){for(var i=0;i=o.length)throw new Error("Missing value for key: "+o[i]);s.set(o[i],o[i+1])}}))},Map.prototype.toString=function(){return this.__toString("Map {","}")},Map.prototype.get=function(s,o){return this._root?this._root.get(0,void 0,s,o):o},Map.prototype.set=function(s,o){return updateMap(this,s,o)},Map.prototype.setIn=function(s,o){return this.updateIn(s,j,(function(){return o}))},Map.prototype.remove=function(s){return updateMap(this,s,j)},Map.prototype.deleteIn=function(s){return this.updateIn(s,(function(){return j}))},Map.prototype.update=function(s,o,i){return 1===arguments.length?s(this):this.updateIn([s],o,i)},Map.prototype.updateIn=function(s,o,i){i||(i=o,o=void 0);var a=updateInDeepMap(this,forceIterator(s),o,i);return a===j?void 0:a},Map.prototype.clear=function(){return 0===this.size?this:this.__ownerID?(this.size=0,this._root=null,this.__hash=void 0,this.__altered=!0,this):emptyMap()},Map.prototype.merge=function(){return mergeIntoMapWith(this,void 0,arguments)},Map.prototype.mergeWith=function(o){return mergeIntoMapWith(this,o,s.call(arguments,1))},Map.prototype.mergeIn=function(o){var i=s.call(arguments,1);return this.updateIn(o,emptyMap(),(function(s){return"function"==typeof s.merge?s.merge.apply(s,i):i[i.length-1]}))},Map.prototype.mergeDeep=function(){return mergeIntoMapWith(this,deepMerger,arguments)},Map.prototype.mergeDeepWith=function(o){var i=s.call(arguments,1);return mergeIntoMapWith(this,deepMergerWith(o),i)},Map.prototype.mergeDeepIn=function(o){var i=s.call(arguments,1);return this.updateIn(o,emptyMap(),(function(s){return"function"==typeof s.mergeDeep?s.mergeDeep.apply(s,i):i[i.length-1]}))},Map.prototype.sort=function(s){return OrderedMap(sortFactory(this,s))},Map.prototype.sortBy=function(s,o){return OrderedMap(sortFactory(this,o,s))},Map.prototype.withMutations=function(s){var o=this.asMutable();return s(o),o.wasAltered()?o.__ensureOwner(this.__ownerID):this},Map.prototype.asMutable=function(){return this.__ownerID?this:this.__ensureOwner(new OwnerID)},Map.prototype.asImmutable=function(){return this.__ensureOwner()},Map.prototype.wasAltered=function(){return this.__altered},Map.prototype.__iterator=function(s,o){return new MapIterator(this,s,o)},Map.prototype.__iterate=function(s,o){var i=this,a=0;return this._root&&this._root.iterate((function(o){return a++,s(o[1],o[0],i)}),o),a},Map.prototype.__ensureOwner=function(s){return s===this.__ownerID?this:s?makeMap(this.size,this._root,s,this.__hash):(this.__ownerID=s,this.__altered=!1,this)},Map.isMap=isMap;var Te,Re="@@__IMMUTABLE_MAP__@@",$e=Map.prototype;function ArrayMapNode(s,o){this.ownerID=s,this.entries=o}function BitmapIndexedNode(s,o,i){this.ownerID=s,this.bitmap=o,this.nodes=i}function HashArrayMapNode(s,o,i){this.ownerID=s,this.count=o,this.nodes=i}function HashCollisionNode(s,o,i){this.ownerID=s,this.keyHash=o,this.entries=i}function ValueNode(s,o,i){this.ownerID=s,this.keyHash=o,this.entry=i}function MapIterator(s,o,i){this._type=o,this._reverse=i,this._stack=s._root&&mapIteratorFrame(s._root)}function mapIteratorValue(s,o){return iteratorValue(s,o[0],o[1])}function mapIteratorFrame(s,o){return{node:s,index:0,__prev:o}}function makeMap(s,o,i,a){var u=Object.create($e);return u.size=s,u._root=o,u.__ownerID=i,u.__hash=a,u.__altered=!1,u}function emptyMap(){return Te||(Te=makeMap(0))}function updateMap(s,o,i){var a,u;if(s._root){var _=MakeRef(L),w=MakeRef(B);if(a=updateNode(s._root,s.__ownerID,0,void 0,o,i,_,w),!w.value)return s;u=s.size+(_.value?i===j?-1:1:0)}else{if(i===j)return s;u=1,a=new ArrayMapNode(s.__ownerID,[[o,i]])}return s.__ownerID?(s.size=u,s._root=a,s.__hash=void 0,s.__altered=!0,s):a?makeMap(u,a):emptyMap()}function updateNode(s,o,i,a,u,_,w,x){return s?s.update(o,i,a,u,_,w,x):_===j?s:(SetRef(x),SetRef(w),new ValueNode(o,a,[u,_]))}function isLeafNode(s){return s.constructor===ValueNode||s.constructor===HashCollisionNode}function mergeIntoNode(s,o,i,a,u){if(s.keyHash===a)return new HashCollisionNode(o,a,[s.entry,u]);var _,x=(0===i?s.keyHash:s.keyHash>>>i)&C,j=(0===i?a:a>>>i)&C;return new BitmapIndexedNode(o,1<>>=1)w[C]=1&i?o[_++]:void 0;return w[a]=u,new HashArrayMapNode(s,_+1,w)}function mergeIntoMapWith(s,o,i){for(var a=[],u=0;u>1&1431655765))+(s>>2&858993459))+(s>>4)&252645135,s+=s>>8,127&(s+=s>>16)}function setIn(s,o,i,a){var u=a?s:arrCopy(s);return u[o]=i,u}function spliceIn(s,o,i,a){var u=s.length+1;if(a&&o+1===u)return s[o]=i,s;for(var _=new Array(u),w=0,x=0;x=qe)return createNodes(s,C,a,u);var U=s&&s===this.ownerID,V=U?C:arrCopy(C);return $?x?L===B-1?V.pop():V[L]=V.pop():V[L]=[a,u]:V.push([a,u]),U?(this.entries=V,this):new ArrayMapNode(s,V)}},BitmapIndexedNode.prototype.get=function(s,o,i,a){void 0===o&&(o=hash(i));var u=1<<((0===s?o:o>>>s)&C),_=this.bitmap;return _&u?this.nodes[popCount(_&u-1)].get(s+w,o,i,a):a},BitmapIndexedNode.prototype.update=function(s,o,i,a,u,_,x){void 0===i&&(i=hash(a));var L=(0===o?i:i>>>o)&C,B=1<=ze)return expandNodes(s,z,$,L,Z);if(U&&!Z&&2===z.length&&isLeafNode(z[1^V]))return z[1^V];if(U&&Z&&1===z.length&&isLeafNode(Z))return Z;var ee=s&&s===this.ownerID,ie=U?Z?$:$^B:$|B,ae=U?Z?setIn(z,V,Z,ee):spliceOut(z,V,ee):spliceIn(z,V,Z,ee);return ee?(this.bitmap=ie,this.nodes=ae,this):new BitmapIndexedNode(s,ie,ae)},HashArrayMapNode.prototype.get=function(s,o,i,a){void 0===o&&(o=hash(i));var u=(0===s?o:o>>>s)&C,_=this.nodes[u];return _?_.get(s+w,o,i,a):a},HashArrayMapNode.prototype.update=function(s,o,i,a,u,_,x){void 0===i&&(i=hash(a));var L=(0===o?i:i>>>o)&C,B=u===j,$=this.nodes,U=$[L];if(B&&!U)return this;var V=updateNode(U,s,o+w,i,a,u,_,x);if(V===U)return this;var z=this.count;if(U){if(!V&&--z0&&a=0&&s>>o&C;if(a>=this.array.length)return new VNode([],s);var u,_=0===a;if(o>0){var x=this.array[a];if((u=x&&x.removeBefore(s,o-w,i))===x&&_)return this}if(_&&!u)return this;var j=editableVNode(this,s);if(!_)for(var L=0;L>>o&C;if(u>=this.array.length)return this;if(o>0){var _=this.array[u];if((a=_&&_.removeAfter(s,o-w,i))===_&&u===this.array.length-1)return this}var x=editableVNode(this,s);return x.array.splice(u+1),a&&(x.array[u]=a),x};var Xe,Qe,et={};function iterateList(s,o){var i=s._origin,a=s._capacity,u=getTailOffset(a),_=s._tail;return iterateNodeOrLeaf(s._root,s._level,0);function iterateNodeOrLeaf(s,o,i){return 0===o?iterateLeaf(s,i):iterateNode(s,o,i)}function iterateLeaf(s,w){var C=w===u?_&&_.array:s&&s.array,j=w>i?0:i-w,L=a-w;return L>x&&(L=x),function(){if(j===L)return et;var s=o?--L:j++;return C&&C[s]}}function iterateNode(s,u,_){var C,j=s&&s.array,L=_>i?0:i-_>>u,B=1+(a-_>>u);return B>x&&(B=x),function(){for(;;){if(C){var s=C();if(s!==et)return s;C=null}if(L===B)return et;var i=o?--B:L++;C=iterateNodeOrLeaf(j&&j[i],u-w,_+(i<=s.size||o<0)return s.withMutations((function(s){o<0?setListBounds(s,o).set(0,i):setListBounds(s,0,o+1).set(o,i)}));o+=s._origin;var a=s._tail,u=s._root,_=MakeRef(B);return o>=getTailOffset(s._capacity)?a=updateVNode(a,s.__ownerID,0,o,i,_):u=updateVNode(u,s.__ownerID,s._level,o,i,_),_.value?s.__ownerID?(s._root=u,s._tail=a,s.__hash=void 0,s.__altered=!0,s):makeList(s._origin,s._capacity,s._level,u,a):s}function updateVNode(s,o,i,a,u,_){var x,j=a>>>i&C,L=s&&j0){var B=s&&s.array[j],$=updateVNode(B,o,i-w,a,u,_);return $===B?s:((x=editableVNode(s,o)).array[j]=$,x)}return L&&s.array[j]===u?s:(SetRef(_),x=editableVNode(s,o),void 0===u&&j===x.array.length-1?x.array.pop():x.array[j]=u,x)}function editableVNode(s,o){return o&&s&&o===s.ownerID?s:new VNode(s?s.array.slice():[],o)}function listNodeFor(s,o){if(o>=getTailOffset(s._capacity))return s._tail;if(o<1<0;)i=i.array[o>>>a&C],a-=w;return i}}function setListBounds(s,o,i){void 0!==o&&(o|=0),void 0!==i&&(i|=0);var a=s.__ownerID||new OwnerID,u=s._origin,_=s._capacity,x=u+o,j=void 0===i?_:i<0?_+i:u+i;if(x===u&&j===_)return s;if(x>=j)return s.clear();for(var L=s._level,B=s._root,$=0;x+$<0;)B=new VNode(B&&B.array.length?[void 0,B]:[],a),$+=1<<(L+=w);$&&(x+=$,u+=$,j+=$,_+=$);for(var U=getTailOffset(_),V=getTailOffset(j);V>=1<U?new VNode([],a):z;if(z&&V>U&&x<_&&z.array.length){for(var Z=B=editableVNode(B,a),ee=L;ee>w;ee-=w){var ie=U>>>ee&C;Z=Z.array[ie]=editableVNode(Z.array[ie],a)}Z.array[U>>>w&C]=z}if(j<_&&(Y=Y&&Y.removeAfter(a,0,j)),x>=V)x-=V,j-=V,L=w,B=null,Y=Y&&Y.removeBefore(a,0,x);else if(x>u||V>>L&C;if(ae!==V>>>L&C)break;ae&&($+=(1<u&&(B=B.removeBefore(a,L,x-$)),B&&Vu&&(u=x.size),isIterable(w)||(x=x.map((function(s){return fromJS(s)}))),a.push(x)}return u>s.size&&(s=s.setSize(u)),mergeIntoCollectionWith(s,o,a)}function getTailOffset(s){return s>>w<=x&&w.size>=2*_.size?(a=(u=w.filter((function(s,o){return void 0!==s&&C!==o}))).toKeyedSeq().map((function(s){return s[0]})).flip().toMap(),s.__ownerID&&(a.__ownerID=u.__ownerID=s.__ownerID)):(a=_.remove(o),u=C===w.size-1?w.pop():w.set(C,void 0))}else if(L){if(i===w.get(C)[1])return s;a=_,u=w.set(C,[o,i])}else a=_.set(o,w.size),u=w.set(w.size,[o,i]);return s.__ownerID?(s.size=a.size,s._map=a,s._list=u,s.__hash=void 0,s):makeOrderedMap(a,u)}function ToKeyedSequence(s,o){this._iter=s,this._useKeys=o,this.size=s.size}function ToIndexedSequence(s){this._iter=s,this.size=s.size}function ToSetSequence(s){this._iter=s,this.size=s.size}function FromEntriesSequence(s){this._iter=s,this.size=s.size}function flipFactory(s){var o=makeSequence(s);return o._iter=s,o.size=s.size,o.flip=function(){return s},o.reverse=function(){var o=s.reverse.apply(this);return o.flip=function(){return s.reverse()},o},o.has=function(o){return s.includes(o)},o.includes=function(o){return s.has(o)},o.cacheResult=cacheResultThrough,o.__iterateUncached=function(o,i){var a=this;return s.__iterate((function(s,i){return!1!==o(i,s,a)}),i)},o.__iteratorUncached=function(o,i){if(o===V){var a=s.__iterator(o,i);return new Iterator((function(){var s=a.next();if(!s.done){var o=s.value[0];s.value[0]=s.value[1],s.value[1]=o}return s}))}return s.__iterator(o===U?$:U,i)},o}function mapFactory(s,o,i){var a=makeSequence(s);return a.size=s.size,a.has=function(o){return s.has(o)},a.get=function(a,u){var _=s.get(a,j);return _===j?u:o.call(i,_,a,s)},a.__iterateUncached=function(a,u){var _=this;return s.__iterate((function(s,u,w){return!1!==a(o.call(i,s,u,w),u,_)}),u)},a.__iteratorUncached=function(a,u){var _=s.__iterator(V,u);return new Iterator((function(){var u=_.next();if(u.done)return u;var w=u.value,x=w[0];return iteratorValue(a,x,o.call(i,w[1],x,s),u)}))},a}function reverseFactory(s,o){var i=makeSequence(s);return i._iter=s,i.size=s.size,i.reverse=function(){return s},s.flip&&(i.flip=function(){var o=flipFactory(s);return o.reverse=function(){return s.flip()},o}),i.get=function(i,a){return s.get(o?i:-1-i,a)},i.has=function(i){return s.has(o?i:-1-i)},i.includes=function(o){return s.includes(o)},i.cacheResult=cacheResultThrough,i.__iterate=function(o,i){var a=this;return s.__iterate((function(s,i){return o(s,i,a)}),!i)},i.__iterator=function(o,i){return s.__iterator(o,!i)},i}function filterFactory(s,o,i,a){var u=makeSequence(s);return a&&(u.has=function(a){var u=s.get(a,j);return u!==j&&!!o.call(i,u,a,s)},u.get=function(a,u){var _=s.get(a,j);return _!==j&&o.call(i,_,a,s)?_:u}),u.__iterateUncached=function(u,_){var w=this,x=0;return s.__iterate((function(s,_,C){if(o.call(i,s,_,C))return x++,u(s,a?_:x-1,w)}),_),x},u.__iteratorUncached=function(u,_){var w=s.__iterator(V,_),x=0;return new Iterator((function(){for(;;){var _=w.next();if(_.done)return _;var C=_.value,j=C[0],L=C[1];if(o.call(i,L,j,s))return iteratorValue(u,a?j:x++,L,_)}}))},u}function countByFactory(s,o,i){var a=Map().asMutable();return s.__iterate((function(u,_){a.update(o.call(i,u,_,s),0,(function(s){return s+1}))})),a.asImmutable()}function groupByFactory(s,o,i){var a=isKeyed(s),u=(isOrdered(s)?OrderedMap():Map()).asMutable();s.__iterate((function(_,w){u.update(o.call(i,_,w,s),(function(s){return(s=s||[]).push(a?[w,_]:_),s}))}));var _=iterableClass(s);return u.map((function(o){return reify(s,_(o))}))}function sliceFactory(s,o,i,a){var u=s.size;if(void 0!==o&&(o|=0),void 0!==i&&(i===1/0?i=u:i|=0),wholeSlice(o,i,u))return s;var _=resolveBegin(o,u),w=resolveEnd(i,u);if(_!=_||w!=w)return sliceFactory(s.toSeq().cacheResult(),o,i,a);var x,C=w-_;C==C&&(x=C<0?0:C);var j=makeSequence(s);return j.size=0===x?x:s.size&&x||void 0,!a&&isSeq(s)&&x>=0&&(j.get=function(o,i){return(o=wrapIndex(this,o))>=0&&ox)return iteratorDone();var s=u.next();return a||o===U?s:iteratorValue(o,C-1,o===$?void 0:s.value[1],s)}))},j}function takeWhileFactory(s,o,i){var a=makeSequence(s);return a.__iterateUncached=function(a,u){var _=this;if(u)return this.cacheResult().__iterate(a,u);var w=0;return s.__iterate((function(s,u,x){return o.call(i,s,u,x)&&++w&&a(s,u,_)})),w},a.__iteratorUncached=function(a,u){var _=this;if(u)return this.cacheResult().__iterator(a,u);var w=s.__iterator(V,u),x=!0;return new Iterator((function(){if(!x)return iteratorDone();var s=w.next();if(s.done)return s;var u=s.value,C=u[0],j=u[1];return o.call(i,j,C,_)?a===V?s:iteratorValue(a,C,j,s):(x=!1,iteratorDone())}))},a}function skipWhileFactory(s,o,i,a){var u=makeSequence(s);return u.__iterateUncached=function(u,_){var w=this;if(_)return this.cacheResult().__iterate(u,_);var x=!0,C=0;return s.__iterate((function(s,_,j){if(!x||!(x=o.call(i,s,_,j)))return C++,u(s,a?_:C-1,w)})),C},u.__iteratorUncached=function(u,_){var w=this;if(_)return this.cacheResult().__iterator(u,_);var x=s.__iterator(V,_),C=!0,j=0;return new Iterator((function(){var s,_,L;do{if((s=x.next()).done)return a||u===U?s:iteratorValue(u,j++,u===$?void 0:s.value[1],s);var B=s.value;_=B[0],L=B[1],C&&(C=o.call(i,L,_,w))}while(C);return u===V?s:iteratorValue(u,_,L,s)}))},u}function concatFactory(s,o){var i=isKeyed(s),a=[s].concat(o).map((function(s){return isIterable(s)?i&&(s=KeyedIterable(s)):s=i?keyedSeqFromValue(s):indexedSeqFromValue(Array.isArray(s)?s:[s]),s})).filter((function(s){return 0!==s.size}));if(0===a.length)return s;if(1===a.length){var u=a[0];if(u===s||i&&isKeyed(u)||isIndexed(s)&&isIndexed(u))return u}var _=new ArraySeq(a);return i?_=_.toKeyedSeq():isIndexed(s)||(_=_.toSetSeq()),(_=_.flatten(!0)).size=a.reduce((function(s,o){if(void 0!==s){var i=o.size;if(void 0!==i)return s+i}}),0),_}function flattenFactory(s,o,i){var a=makeSequence(s);return a.__iterateUncached=function(a,u){var _=0,w=!1;function flatDeep(s,x){var C=this;s.__iterate((function(s,u){return(!o||x0}function zipWithFactory(s,o,i){var a=makeSequence(s);return a.size=new ArraySeq(i).map((function(s){return s.size})).min(),a.__iterate=function(s,o){for(var i,a=this.__iterator(U,o),u=0;!(i=a.next()).done&&!1!==s(i.value,u++,this););return u},a.__iteratorUncached=function(s,a){var u=i.map((function(s){return s=Iterable(s),getIterator(a?s.reverse():s)})),_=0,w=!1;return new Iterator((function(){var i;return w||(i=u.map((function(s){return s.next()})),w=i.some((function(s){return s.done}))),w?iteratorDone():iteratorValue(s,_++,o.apply(null,i.map((function(s){return s.value}))))}))},a}function reify(s,o){return isSeq(s)?o:s.constructor(o)}function validateEntry(s){if(s!==Object(s))throw new TypeError("Expected [K, V] tuple: "+s)}function resolveSize(s){return assertNotInfinite(s.size),ensureSize(s)}function iterableClass(s){return isKeyed(s)?KeyedIterable:isIndexed(s)?IndexedIterable:SetIterable}function makeSequence(s){return Object.create((isKeyed(s)?KeyedSeq:isIndexed(s)?IndexedSeq:SetSeq).prototype)}function cacheResultThrough(){return this._iter.cacheResult?(this._iter.cacheResult(),this.size=this._iter.size,this):Seq.prototype.cacheResult.call(this)}function defaultComparator(s,o){return s>o?1:s=0;i--)o={value:arguments[i],next:o};return this.__ownerID?(this.size=s,this._head=o,this.__hash=void 0,this.__altered=!0,this):makeStack(s,o)},Stack.prototype.pushAll=function(s){if(0===(s=IndexedIterable(s)).size)return this;assertNotInfinite(s.size);var o=this.size,i=this._head;return s.reverse().forEach((function(s){o++,i={value:s,next:i}})),this.__ownerID?(this.size=o,this._head=i,this.__hash=void 0,this.__altered=!0,this):makeStack(o,i)},Stack.prototype.pop=function(){return this.slice(1)},Stack.prototype.unshift=function(){return this.push.apply(this,arguments)},Stack.prototype.unshiftAll=function(s){return this.pushAll(s)},Stack.prototype.shift=function(){return this.pop.apply(this,arguments)},Stack.prototype.clear=function(){return 0===this.size?this:this.__ownerID?(this.size=0,this._head=void 0,this.__hash=void 0,this.__altered=!0,this):emptyStack()},Stack.prototype.slice=function(s,o){if(wholeSlice(s,o,this.size))return this;var i=resolveBegin(s,this.size);if(resolveEnd(o,this.size)!==this.size)return IndexedCollection.prototype.slice.call(this,s,o);for(var a=this.size-i,u=this._head;i--;)u=u.next;return this.__ownerID?(this.size=a,this._head=u,this.__hash=void 0,this.__altered=!0,this):makeStack(a,u)},Stack.prototype.__ensureOwner=function(s){return s===this.__ownerID?this:s?makeStack(this.size,this._head,s,this.__hash):(this.__ownerID=s,this.__altered=!1,this)},Stack.prototype.__iterate=function(s,o){if(o)return this.reverse().__iterate(s);for(var i=0,a=this._head;a&&!1!==s(a.value,i++,this);)a=a.next;return i},Stack.prototype.__iterator=function(s,o){if(o)return this.reverse().__iterator(s);var i=0,a=this._head;return new Iterator((function(){if(a){var o=a.value;return a=a.next,iteratorValue(s,i++,o)}return iteratorDone()}))},Stack.isStack=isStack;var at,ct="@@__IMMUTABLE_STACK__@@",lt=Stack.prototype;function makeStack(s,o,i,a){var u=Object.create(lt);return u.size=s,u._head=o,u.__ownerID=i,u.__hash=a,u.__altered=!1,u}function emptyStack(){return at||(at=makeStack(0))}function mixin(s,o){var keyCopier=function(i){s.prototype[i]=o[i]};return Object.keys(o).forEach(keyCopier),Object.getOwnPropertySymbols&&Object.getOwnPropertySymbols(o).forEach(keyCopier),s}lt[ct]=!0,lt.withMutations=$e.withMutations,lt.asMutable=$e.asMutable,lt.asImmutable=$e.asImmutable,lt.wasAltered=$e.wasAltered,Iterable.Iterator=Iterator,mixin(Iterable,{toArray:function(){assertNotInfinite(this.size);var s=new Array(this.size||0);return this.valueSeq().__iterate((function(o,i){s[i]=o})),s},toIndexedSeq:function(){return new ToIndexedSequence(this)},toJS:function(){return this.toSeq().map((function(s){return s&&"function"==typeof s.toJS?s.toJS():s})).__toJS()},toJSON:function(){return this.toSeq().map((function(s){return s&&"function"==typeof s.toJSON?s.toJSON():s})).__toJS()},toKeyedSeq:function(){return new ToKeyedSequence(this,!0)},toMap:function(){return Map(this.toKeyedSeq())},toObject:function(){assertNotInfinite(this.size);var s={};return this.__iterate((function(o,i){s[i]=o})),s},toOrderedMap:function(){return OrderedMap(this.toKeyedSeq())},toOrderedSet:function(){return OrderedSet(isKeyed(this)?this.valueSeq():this)},toSet:function(){return Set(isKeyed(this)?this.valueSeq():this)},toSetSeq:function(){return new ToSetSequence(this)},toSeq:function(){return isIndexed(this)?this.toIndexedSeq():isKeyed(this)?this.toKeyedSeq():this.toSetSeq()},toStack:function(){return Stack(isKeyed(this)?this.valueSeq():this)},toList:function(){return List(isKeyed(this)?this.valueSeq():this)},toString:function(){return"[Iterable]"},__toString:function(s,o){return 0===this.size?s+o:s+" "+this.toSeq().map(this.__toStringMapper).join(", ")+" "+o},concat:function(){return reify(this,concatFactory(this,s.call(arguments,0)))},includes:function(s){return this.some((function(o){return is(o,s)}))},entries:function(){return this.__iterator(V)},every:function(s,o){assertNotInfinite(this.size);var i=!0;return this.__iterate((function(a,u,_){if(!s.call(o,a,u,_))return i=!1,!1})),i},filter:function(s,o){return reify(this,filterFactory(this,s,o,!0))},find:function(s,o,i){var a=this.findEntry(s,o);return a?a[1]:i},forEach:function(s,o){return assertNotInfinite(this.size),this.__iterate(o?s.bind(o):s)},join:function(s){assertNotInfinite(this.size),s=void 0!==s?""+s:",";var o="",i=!0;return this.__iterate((function(a){i?i=!1:o+=s,o+=null!=a?a.toString():""})),o},keys:function(){return this.__iterator($)},map:function(s,o){return reify(this,mapFactory(this,s,o))},reduce:function(s,o,i){var a,u;return assertNotInfinite(this.size),arguments.length<2?u=!0:a=o,this.__iterate((function(o,_,w){u?(u=!1,a=o):a=s.call(i,a,o,_,w)})),a},reduceRight:function(s,o,i){var a=this.toKeyedSeq().reverse();return a.reduce.apply(a,arguments)},reverse:function(){return reify(this,reverseFactory(this,!0))},slice:function(s,o){return reify(this,sliceFactory(this,s,o,!0))},some:function(s,o){return!this.every(not(s),o)},sort:function(s){return reify(this,sortFactory(this,s))},values:function(){return this.__iterator(U)},butLast:function(){return this.slice(0,-1)},isEmpty:function(){return void 0!==this.size?0===this.size:!this.some((function(){return!0}))},count:function(s,o){return ensureSize(s?this.toSeq().filter(s,o):this)},countBy:function(s,o){return countByFactory(this,s,o)},equals:function(s){return deepEqual(this,s)},entrySeq:function(){var s=this;if(s._cache)return new ArraySeq(s._cache);var o=s.toSeq().map(entryMapper).toIndexedSeq();return o.fromEntrySeq=function(){return s.toSeq()},o},filterNot:function(s,o){return this.filter(not(s),o)},findEntry:function(s,o,i){var a=i;return this.__iterate((function(i,u,_){if(s.call(o,i,u,_))return a=[u,i],!1})),a},findKey:function(s,o){var i=this.findEntry(s,o);return i&&i[0]},findLast:function(s,o,i){return this.toKeyedSeq().reverse().find(s,o,i)},findLastEntry:function(s,o,i){return this.toKeyedSeq().reverse().findEntry(s,o,i)},findLastKey:function(s,o){return this.toKeyedSeq().reverse().findKey(s,o)},first:function(){return this.find(returnTrue)},flatMap:function(s,o){return reify(this,flatMapFactory(this,s,o))},flatten:function(s){return reify(this,flattenFactory(this,s,!0))},fromEntrySeq:function(){return new FromEntriesSequence(this)},get:function(s,o){return this.find((function(o,i){return is(i,s)}),void 0,o)},getIn:function(s,o){for(var i,a=this,u=forceIterator(s);!(i=u.next()).done;){var _=i.value;if((a=a&&a.get?a.get(_,j):j)===j)return o}return a},groupBy:function(s,o){return groupByFactory(this,s,o)},has:function(s){return this.get(s,j)!==j},hasIn:function(s){return this.getIn(s,j)!==j},isSubset:function(s){return s="function"==typeof s.includes?s:Iterable(s),this.every((function(o){return s.includes(o)}))},isSuperset:function(s){return(s="function"==typeof s.isSubset?s:Iterable(s)).isSubset(this)},keyOf:function(s){return this.findKey((function(o){return is(o,s)}))},keySeq:function(){return this.toSeq().map(keyMapper).toIndexedSeq()},last:function(){return this.toSeq().reverse().first()},lastKeyOf:function(s){return this.toKeyedSeq().reverse().keyOf(s)},max:function(s){return maxFactory(this,s)},maxBy:function(s,o){return maxFactory(this,o,s)},min:function(s){return maxFactory(this,s?neg(s):defaultNegComparator)},minBy:function(s,o){return maxFactory(this,o?neg(o):defaultNegComparator,s)},rest:function(){return this.slice(1)},skip:function(s){return this.slice(Math.max(0,s))},skipLast:function(s){return reify(this,this.toSeq().reverse().skip(s).reverse())},skipWhile:function(s,o){return reify(this,skipWhileFactory(this,s,o,!0))},skipUntil:function(s,o){return this.skipWhile(not(s),o)},sortBy:function(s,o){return reify(this,sortFactory(this,o,s))},take:function(s){return this.slice(0,Math.max(0,s))},takeLast:function(s){return reify(this,this.toSeq().reverse().take(s).reverse())},takeWhile:function(s,o){return reify(this,takeWhileFactory(this,s,o))},takeUntil:function(s,o){return this.takeWhile(not(s),o)},valueSeq:function(){return this.toIndexedSeq()},hashCode:function(){return this.__hash||(this.__hash=hashIterable(this))}});var ut=Iterable.prototype;ut[o]=!0,ut[Z]=ut.values,ut.__toJS=ut.toArray,ut.__toStringMapper=quoteString,ut.inspect=ut.toSource=function(){return this.toString()},ut.chain=ut.flatMap,ut.contains=ut.includes,mixin(KeyedIterable,{flip:function(){return reify(this,flipFactory(this))},mapEntries:function(s,o){var i=this,a=0;return reify(this,this.toSeq().map((function(u,_){return s.call(o,[_,u],a++,i)})).fromEntrySeq())},mapKeys:function(s,o){var i=this;return reify(this,this.toSeq().flip().map((function(a,u){return s.call(o,a,u,i)})).flip())}});var pt=KeyedIterable.prototype;function keyMapper(s,o){return o}function entryMapper(s,o){return[o,s]}function not(s){return function(){return!s.apply(this,arguments)}}function neg(s){return function(){return-s.apply(this,arguments)}}function quoteString(s){return"string"==typeof s?JSON.stringify(s):String(s)}function defaultZipper(){return arrCopy(arguments)}function defaultNegComparator(s,o){return so?-1:0}function hashIterable(s){if(s.size===1/0)return 0;var o=isOrdered(s),i=isKeyed(s),a=o?1:0;return murmurHashOfSize(s.__iterate(i?o?function(s,o){a=31*a+hashMerge(hash(s),hash(o))|0}:function(s,o){a=a+hashMerge(hash(s),hash(o))|0}:o?function(s){a=31*a+hash(s)|0}:function(s){a=a+hash(s)|0}),a)}function murmurHashOfSize(s,o){return o=le(o,3432918353),o=le(o<<15|o>>>-15,461845907),o=le(o<<13|o>>>-13,5),o=le((o=o+3864292196^s)^o>>>16,2246822507),o=smi((o=le(o^o>>>13,3266489909))^o>>>16)}function hashMerge(s,o){return s^o+2654435769+(s<<6)+(s>>2)}return pt[i]=!0,pt[Z]=ut.entries,pt.__toJS=ut.toObject,pt.__toStringMapper=function(s,o){return JSON.stringify(o)+": "+quoteString(s)},mixin(IndexedIterable,{toKeyedSeq:function(){return new ToKeyedSequence(this,!1)},filter:function(s,o){return reify(this,filterFactory(this,s,o,!1))},findIndex:function(s,o){var i=this.findEntry(s,o);return i?i[0]:-1},indexOf:function(s){var o=this.keyOf(s);return void 0===o?-1:o},lastIndexOf:function(s){var o=this.lastKeyOf(s);return void 0===o?-1:o},reverse:function(){return reify(this,reverseFactory(this,!1))},slice:function(s,o){return reify(this,sliceFactory(this,s,o,!1))},splice:function(s,o){var i=arguments.length;if(o=Math.max(0|o,0),0===i||2===i&&!o)return this;s=resolveBegin(s,s<0?this.count():this.size);var a=this.slice(0,s);return reify(this,1===i?a:a.concat(arrCopy(arguments,2),this.slice(s+o)))},findLastIndex:function(s,o){var i=this.findLastEntry(s,o);return i?i[0]:-1},first:function(){return this.get(0)},flatten:function(s){return reify(this,flattenFactory(this,s,!1))},get:function(s,o){return(s=wrapIndex(this,s))<0||this.size===1/0||void 0!==this.size&&s>this.size?o:this.find((function(o,i){return i===s}),void 0,o)},has:function(s){return(s=wrapIndex(this,s))>=0&&(void 0!==this.size?this.size===1/0||s{"use strict";i(71340);var a=i(92046);s.exports=a.Object.assign},9957:(s,o,i)=>{"use strict";var a=Function.prototype.call,u=Object.prototype.hasOwnProperty,_=i(66743);s.exports=_.call(a,u)},9999:(s,o,i)=>{var a=i(37217),u=i(83729),_=i(16547),w=i(74733),x=i(43838),C=i(93290),j=i(23007),L=i(92271),B=i(48948),$=i(50002),U=i(83349),V=i(5861),z=i(76189),Y=i(77199),Z=i(35529),ee=i(56449),ie=i(3656),ae=i(87730),ce=i(23805),le=i(38440),pe=i(95950),de=i(37241),fe="[object Arguments]",ye="[object Function]",be="[object Object]",_e={};_e[fe]=_e["[object Array]"]=_e["[object ArrayBuffer]"]=_e["[object DataView]"]=_e["[object Boolean]"]=_e["[object Date]"]=_e["[object Float32Array]"]=_e["[object Float64Array]"]=_e["[object Int8Array]"]=_e["[object Int16Array]"]=_e["[object Int32Array]"]=_e["[object Map]"]=_e["[object Number]"]=_e[be]=_e["[object RegExp]"]=_e["[object Set]"]=_e["[object String]"]=_e["[object Symbol]"]=_e["[object Uint8Array]"]=_e["[object Uint8ClampedArray]"]=_e["[object Uint16Array]"]=_e["[object Uint32Array]"]=!0,_e["[object Error]"]=_e[ye]=_e["[object WeakMap]"]=!1,s.exports=function baseClone(s,o,i,Se,we,xe){var Pe,Te=1&o,Re=2&o,$e=4&o;if(i&&(Pe=we?i(s,Se,we,xe):i(s)),void 0!==Pe)return Pe;if(!ce(s))return s;var qe=ee(s);if(qe){if(Pe=z(s),!Te)return j(s,Pe)}else{var ze=V(s),We=ze==ye||"[object GeneratorFunction]"==ze;if(ie(s))return C(s,Te);if(ze==be||ze==fe||We&&!we){if(Pe=Re||We?{}:Z(s),!Te)return Re?B(s,x(Pe,s)):L(s,w(Pe,s))}else{if(!_e[ze])return we?s:{};Pe=Y(s,ze,Te)}}xe||(xe=new a);var He=xe.get(s);if(He)return He;xe.set(s,Pe),le(s)?s.forEach((function(a){Pe.add(baseClone(a,o,i,a,s,xe))})):ae(s)&&s.forEach((function(a,u){Pe.set(u,baseClone(a,o,i,u,s,xe))}));var Ye=qe?void 0:($e?Re?U:$:Re?de:pe)(s);return u(Ye||s,(function(a,u){Ye&&(a=s[u=a]),_(Pe,u,baseClone(a,o,i,u,s,xe))})),Pe}},10023:(s,o,i)=>{const a=i(6205),INTS=()=>[{type:a.RANGE,from:48,to:57}],WORDS=()=>[{type:a.CHAR,value:95},{type:a.RANGE,from:97,to:122},{type:a.RANGE,from:65,to:90}].concat(INTS()),WHITESPACE=()=>[{type:a.CHAR,value:9},{type:a.CHAR,value:10},{type:a.CHAR,value:11},{type:a.CHAR,value:12},{type:a.CHAR,value:13},{type:a.CHAR,value:32},{type:a.CHAR,value:160},{type:a.CHAR,value:5760},{type:a.RANGE,from:8192,to:8202},{type:a.CHAR,value:8232},{type:a.CHAR,value:8233},{type:a.CHAR,value:8239},{type:a.CHAR,value:8287},{type:a.CHAR,value:12288},{type:a.CHAR,value:65279}];o.words=()=>({type:a.SET,set:WORDS(),not:!1}),o.notWords=()=>({type:a.SET,set:WORDS(),not:!0}),o.ints=()=>({type:a.SET,set:INTS(),not:!1}),o.notInts=()=>({type:a.SET,set:INTS(),not:!0}),o.whitespace=()=>({type:a.SET,set:WHITESPACE(),not:!1}),o.notWhitespace=()=>({type:a.SET,set:WHITESPACE(),not:!0}),o.anyChar=()=>({type:a.SET,set:[{type:a.CHAR,value:10},{type:a.CHAR,value:13},{type:a.CHAR,value:8232},{type:a.CHAR,value:8233}],not:!0})},10043:(s,o,i)=>{"use strict";var a=i(54018),u=String,_=TypeError;s.exports=function(s){if(a(s))return s;throw new _("Can't set "+u(s)+" as a prototype")}},10076:s=>{"use strict";s.exports=Function.prototype.call},10124:(s,o,i)=>{var a=i(9325);s.exports=function(){return a.Date.now()}},10300:(s,o,i)=>{"use strict";var a=i(13930),u=i(82159),_=i(36624),w=i(4640),x=i(73448),C=TypeError;s.exports=function(s,o){var i=arguments.length<2?x(s):o;if(u(i))return _(a(i,s));throw new C(w(s)+" is not iterable")}},10316:(s,o,i)=>{const a=i(2404),u=i(55973),_=i(92340);class Element{constructor(s,o,i){o&&(this.meta=o),i&&(this.attributes=i),this.content=s}freeze(){Object.isFrozen(this)||(this._meta&&(this.meta.parent=this,this.meta.freeze()),this._attributes&&(this.attributes.parent=this,this.attributes.freeze()),this.children.forEach((s=>{s.parent=this,s.freeze()}),this),this.content&&Array.isArray(this.content)&&Object.freeze(this.content),Object.freeze(this))}primitive(){}clone(){const s=new this.constructor;return s.element=this.element,this.meta.length&&(s._meta=this.meta.clone()),this.attributes.length&&(s._attributes=this.attributes.clone()),this.content?this.content.clone?s.content=this.content.clone():Array.isArray(this.content)?s.content=this.content.map((s=>s.clone())):s.content=this.content:s.content=this.content,s}toValue(){return this.content instanceof Element?this.content.toValue():this.content instanceof u?{key:this.content.key.toValue(),value:this.content.value?this.content.value.toValue():void 0}:this.content&&this.content.map?this.content.map((s=>s.toValue()),this):this.content}toRef(s){if(""===this.id.toValue())throw Error("Cannot create reference to an element that does not contain an ID");const o=new this.RefElement(this.id.toValue());return s&&(o.path=s),o}findRecursive(...s){if(arguments.length>1&&!this.isFrozen)throw new Error("Cannot find recursive with multiple element names without first freezing the element. Call `element.freeze()`");const o=s.pop();let i=new _;const append=(s,o)=>(s.push(o),s),checkElement=(s,i)=>{i.element===o&&s.push(i);const a=i.findRecursive(o);return a&&a.reduce(append,s),i.content instanceof u&&(i.content.key&&checkElement(s,i.content.key),i.content.value&&checkElement(s,i.content.value)),s};return this.content&&(this.content.element&&checkElement(i,this.content),Array.isArray(this.content)&&this.content.reduce(checkElement,i)),s.isEmpty||(i=i.filter((o=>{let i=o.parents.map((s=>s.element));for(const o in s){const a=s[o],u=i.indexOf(a);if(-1===u)return!1;i=i.splice(0,u)}return!0}))),i}set(s){return this.content=s,this}equals(s){return a(this.toValue(),s)}getMetaProperty(s,o){if(!this.meta.hasKey(s)){if(this.isFrozen){const s=this.refract(o);return s.freeze(),s}this.meta.set(s,o)}return this.meta.get(s)}setMetaProperty(s,o){this.meta.set(s,o)}get element(){return this._storedElement||"element"}set element(s){this._storedElement=s}get content(){return this._content}set content(s){if(s instanceof Element)this._content=s;else if(s instanceof _)this.content=s.elements;else if("string"==typeof s||"number"==typeof s||"boolean"==typeof s||"null"===s||null==s)this._content=s;else if(s instanceof u)this._content=s;else if(Array.isArray(s))this._content=s.map(this.refract);else{if("object"!=typeof s)throw new Error("Cannot set content to given value");this._content=Object.keys(s).map((o=>new this.MemberElement(o,s[o])))}}get meta(){if(!this._meta){if(this.isFrozen){const s=new this.ObjectElement;return s.freeze(),s}this._meta=new this.ObjectElement}return this._meta}set meta(s){s instanceof this.ObjectElement?this._meta=s:this.meta.set(s||{})}get attributes(){if(!this._attributes){if(this.isFrozen){const s=new this.ObjectElement;return s.freeze(),s}this._attributes=new this.ObjectElement}return this._attributes}set attributes(s){s instanceof this.ObjectElement?this._attributes=s:this.attributes.set(s||{})}get id(){return this.getMetaProperty("id","")}set id(s){this.setMetaProperty("id",s)}get classes(){return this.getMetaProperty("classes",[])}set classes(s){this.setMetaProperty("classes",s)}get title(){return this.getMetaProperty("title","")}set title(s){this.setMetaProperty("title",s)}get description(){return this.getMetaProperty("description","")}set description(s){this.setMetaProperty("description",s)}get links(){return this.getMetaProperty("links",[])}set links(s){this.setMetaProperty("links",s)}get isFrozen(){return Object.isFrozen(this)}get parents(){let{parent:s}=this;const o=new _;for(;s;)o.push(s),s=s.parent;return o}get children(){if(Array.isArray(this.content))return new _(this.content);if(this.content instanceof u){const s=new _([this.content.key]);return this.content.value&&s.push(this.content.value),s}return this.content instanceof Element?new _([this.content]):new _}get recursiveChildren(){const s=new _;return this.children.forEach((o=>{s.push(o),o.recursiveChildren.forEach((o=>{s.push(o)}))})),s}}s.exports=Element},10392:s=>{s.exports=function getValue(s,o){return null==s?void 0:s[o]}},10487:(s,o,i)=>{"use strict";var a=i(96897),u=i(30655),_=i(73126),w=i(12205);s.exports=function callBind(s){var o=_(arguments),i=s.length-(arguments.length-1);return a(o,1+(i>0?i:0),!0)},u?u(s.exports,"apply",{value:w}):s.exports.apply=w},10776:(s,o,i)=>{var a=i(30756),u=i(95950);s.exports=function getMatchData(s){for(var o=u(s),i=o.length;i--;){var _=o[i],w=s[_];o[i]=[_,w,a(w)]}return o}},10866:(s,o,i)=>{const a=i(6048),u=i(92340);class ObjectSlice extends u{map(s,o){return this.elements.map((i=>s.bind(o)(i.value,i.key,i)))}filter(s,o){return new ObjectSlice(this.elements.filter((i=>s.bind(o)(i.value,i.key,i))))}reject(s,o){return this.filter(a(s.bind(o)))}forEach(s,o){return this.elements.forEach(((i,a)=>{s.bind(o)(i.value,i.key,i,a)}))}keys(){return this.map(((s,o)=>o.toValue()))}values(){return this.map((s=>s.toValue()))}}s.exports=ObjectSlice},11002:s=>{"use strict";s.exports=Function.prototype.apply},11042:(s,o,i)=>{"use strict";var a=i(85582),u=i(1907),_=i(24443),w=i(87170),x=i(36624),C=u([].concat);s.exports=a("Reflect","ownKeys")||function ownKeys(s){var o=_.f(x(s)),i=w.f;return i?C(o,i(s)):o}},11091:(s,o,i)=>{"use strict";var a=i(45951),u=i(76024),_=i(92361),w=i(62250),x=i(13846).f,C=i(7463),j=i(92046),L=i(28311),B=i(61626),$=i(49724);i(36128);var wrapConstructor=function(s){var Wrapper=function(o,i,a){if(this instanceof Wrapper){switch(arguments.length){case 0:return new s;case 1:return new s(o);case 2:return new s(o,i)}return new s(o,i,a)}return u(s,this,arguments)};return Wrapper.prototype=s.prototype,Wrapper};s.exports=function(s,o){var i,u,U,V,z,Y,Z,ee,ie,ae=s.target,ce=s.global,le=s.stat,pe=s.proto,de=ce?a:le?a[ae]:a[ae]&&a[ae].prototype,fe=ce?j:j[ae]||B(j,ae,{})[ae],ye=fe.prototype;for(V in o)u=!(i=C(ce?V:ae+(le?".":"#")+V,s.forced))&&de&&$(de,V),Y=fe[V],u&&(Z=s.dontCallGetSet?(ie=x(de,V))&&ie.value:de[V]),z=u&&Z?Z:o[V],(i||pe||typeof Y!=typeof z)&&(ee=s.bind&&u?L(z,a):s.wrap&&u?wrapConstructor(z):pe&&w(z)?_(z):z,(s.sham||z&&z.sham||Y&&Y.sham)&&B(ee,"sham",!0),B(fe,V,ee),pe&&($(j,U=ae+"Prototype")||B(j,U,{}),B(j[U],V,z),s.real&&ye&&(i||!ye[V])&&B(ye,V,z)))}},11287:s=>{s.exports=function getHolder(s){return s.placeholder}},11331:(s,o,i)=>{var a=i(72552),u=i(28879),_=i(40346),w=Function.prototype,x=Object.prototype,C=w.toString,j=x.hasOwnProperty,L=C.call(Object);s.exports=function isPlainObject(s){if(!_(s)||"[object Object]"!=a(s))return!1;var o=u(s);if(null===o)return!0;var i=j.call(o,"constructor")&&o.constructor;return"function"==typeof i&&i instanceof i&&C.call(i)==L}},11470:(s,o,i)=>{"use strict";var a=i(1907),u=i(65482),_=i(90160),w=i(74239),x=a("".charAt),C=a("".charCodeAt),j=a("".slice),createMethod=function(s){return function(o,i){var a,L,B=_(w(o)),$=u(i),U=B.length;return $<0||$>=U?s?"":void 0:(a=C(B,$))<55296||a>56319||$+1===U||(L=C(B,$+1))<56320||L>57343?s?x(B,$):a:s?j(B,$,$+2):L-56320+(a-55296<<10)+65536}};s.exports={codeAt:createMethod(!1),charAt:createMethod(!0)}},11842:(s,o,i)=>{var a=i(82819),u=i(9325);s.exports=function createBind(s,o,i){var _=1&o,w=a(s);return function wrapper(){return(this&&this!==u&&this instanceof wrapper?w:s).apply(_?i:this,arguments)}}},12205:(s,o,i)=>{"use strict";var a=i(66743),u=i(11002),_=i(13144);s.exports=function applyBind(){return _(a,u,arguments)}},12242:(s,o,i)=>{const a=i(10316);s.exports=class BooleanElement extends a{constructor(s,o,i){super(s,o,i),this.element="boolean"}primitive(){return"boolean"}}},12507:(s,o,i)=>{var a=i(28754),u=i(49698),_=i(63912),w=i(13222);s.exports=function createCaseFirst(s){return function(o){o=w(o);var i=u(o)?_(o):void 0,x=i?i[0]:o.charAt(0),C=i?a(i,1).join(""):o.slice(1);return x[s]()+C}}},12560:(s,o,i)=>{"use strict";i(99363);var a=i(19287),u=i(45951),_=i(14840),w=i(93742);for(var x in a)_(u[x],x),w[x]=w.Array},12651:(s,o,i)=>{var a=i(74218);s.exports=function getMapData(s,o){var i=s.__data__;return a(o)?i["string"==typeof o?"string":"hash"]:i.map}},12749:(s,o,i)=>{var a=i(81042),u=Object.prototype.hasOwnProperty;s.exports=function hashHas(s){var o=this.__data__;return a?void 0!==o[s]:u.call(o,s)}},13144:(s,o,i)=>{"use strict";var a=i(66743),u=i(11002),_=i(10076),w=i(47119);s.exports=w||a.call(_,u)},13222:(s,o,i)=>{var a=i(77556);s.exports=function toString(s){return null==s?"":a(s)}},13846:(s,o,i)=>{"use strict";var a=i(39447),u=i(13930),_=i(22574),w=i(75817),x=i(4993),C=i(70470),j=i(49724),L=i(73648),B=Object.getOwnPropertyDescriptor;o.f=a?B:function getOwnPropertyDescriptor(s,o){if(s=x(s),o=C(o),L)try{return B(s,o)}catch(s){}if(j(s,o))return w(!u(_.f,s,o),s[o])}},13930:(s,o,i)=>{"use strict";var a=i(41505),u=Function.prototype.call;s.exports=a?u.bind(u):function(){return u.apply(u,arguments)}},14248:s=>{s.exports=function arraySome(s,o){for(var i=-1,a=null==s?0:s.length;++i{s.exports=function arrayPush(s,o){for(var i=-1,a=o.length,u=s.length;++i{const a=i(10316);s.exports=class RefElement extends a{constructor(s,o,i){super(s||[],o,i),this.element="ref",this.path||(this.path="element")}get path(){return this.attributes.get("path")}set path(s){this.attributes.set("path",s)}}},14744:s=>{"use strict";var o=function isMergeableObject(s){return function isNonNullObject(s){return!!s&&"object"==typeof s}(s)&&!function isSpecial(s){var o=Object.prototype.toString.call(s);return"[object RegExp]"===o||"[object Date]"===o||function isReactElement(s){return s.$$typeof===i}(s)}(s)};var i="function"==typeof Symbol&&Symbol.for?Symbol.for("react.element"):60103;function cloneUnlessOtherwiseSpecified(s,o){return!1!==o.clone&&o.isMergeableObject(s)?deepmerge(function emptyTarget(s){return Array.isArray(s)?[]:{}}(s),s,o):s}function defaultArrayMerge(s,o,i){return s.concat(o).map((function(s){return cloneUnlessOtherwiseSpecified(s,i)}))}function getKeys(s){return Object.keys(s).concat(function getEnumerableOwnPropertySymbols(s){return Object.getOwnPropertySymbols?Object.getOwnPropertySymbols(s).filter((function(o){return Object.propertyIsEnumerable.call(s,o)})):[]}(s))}function propertyIsOnObject(s,o){try{return o in s}catch(s){return!1}}function mergeObject(s,o,i){var a={};return i.isMergeableObject(s)&&getKeys(s).forEach((function(o){a[o]=cloneUnlessOtherwiseSpecified(s[o],i)})),getKeys(o).forEach((function(u){(function propertyIsUnsafe(s,o){return propertyIsOnObject(s,o)&&!(Object.hasOwnProperty.call(s,o)&&Object.propertyIsEnumerable.call(s,o))})(s,u)||(propertyIsOnObject(s,u)&&i.isMergeableObject(o[u])?a[u]=function getMergeFunction(s,o){if(!o.customMerge)return deepmerge;var i=o.customMerge(s);return"function"==typeof i?i:deepmerge}(u,i)(s[u],o[u],i):a[u]=cloneUnlessOtherwiseSpecified(o[u],i))})),a}function deepmerge(s,i,a){(a=a||{}).arrayMerge=a.arrayMerge||defaultArrayMerge,a.isMergeableObject=a.isMergeableObject||o,a.cloneUnlessOtherwiseSpecified=cloneUnlessOtherwiseSpecified;var u=Array.isArray(i);return u===Array.isArray(s)?u?a.arrayMerge(s,i,a):mergeObject(s,i,a):cloneUnlessOtherwiseSpecified(i,a)}deepmerge.all=function deepmergeAll(s,o){if(!Array.isArray(s))throw new Error("first argument should be an array");return s.reduce((function(s,i){return deepmerge(s,i,o)}),{})};var a=deepmerge;s.exports=a},14792:(s,o,i)=>{var a=i(13222),u=i(55808);s.exports=function capitalize(s){return u(a(s).toLowerCase())}},14840:(s,o,i)=>{"use strict";var a=i(52623),u=i(74284).f,_=i(61626),w=i(49724),x=i(54878),C=i(76264)("toStringTag");s.exports=function(s,o,i,j){var L=i?s:s&&s.prototype;L&&(w(L,C)||u(L,C,{configurable:!0,value:o}),j&&!a&&_(L,"toString",x))}},14974:s=>{s.exports=function safeGet(s,o){if(("constructor"!==o||"function"!=typeof s[o])&&"__proto__"!=o)return s[o]}},15287:(s,o)=>{"use strict";var i=Symbol.for("react.element"),a=Symbol.for("react.portal"),u=Symbol.for("react.fragment"),_=Symbol.for("react.strict_mode"),w=Symbol.for("react.profiler"),x=Symbol.for("react.provider"),C=Symbol.for("react.context"),j=Symbol.for("react.forward_ref"),L=Symbol.for("react.suspense"),B=Symbol.for("react.memo"),$=Symbol.for("react.lazy"),U=Symbol.iterator;var V={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},z=Object.assign,Y={};function E(s,o,i){this.props=s,this.context=o,this.refs=Y,this.updater=i||V}function F(){}function G(s,o,i){this.props=s,this.context=o,this.refs=Y,this.updater=i||V}E.prototype.isReactComponent={},E.prototype.setState=function(s,o){if("object"!=typeof s&&"function"!=typeof s&&null!=s)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,s,o,"setState")},E.prototype.forceUpdate=function(s){this.updater.enqueueForceUpdate(this,s,"forceUpdate")},F.prototype=E.prototype;var Z=G.prototype=new F;Z.constructor=G,z(Z,E.prototype),Z.isPureReactComponent=!0;var ee=Array.isArray,ie=Object.prototype.hasOwnProperty,ae={current:null},ce={key:!0,ref:!0,__self:!0,__source:!0};function M(s,o,a){var u,_={},w=null,x=null;if(null!=o)for(u in void 0!==o.ref&&(x=o.ref),void 0!==o.key&&(w=""+o.key),o)ie.call(o,u)&&!ce.hasOwnProperty(u)&&(_[u]=o[u]);var C=arguments.length-2;if(1===C)_.children=a;else if(1{var a=i(96131);s.exports=function arrayIncludes(s,o){return!!(null==s?0:s.length)&&a(s,o,0)>-1}},15340:()=>{},15377:(s,o,i)=>{"use strict";var a=i(92861).Buffer,u=i(64634),_=i(74372),w=ArrayBuffer.isView||function isView(s){try{return _(s),!0}catch(s){return!1}},x="undefined"!=typeof Uint8Array,C="undefined"!=typeof ArrayBuffer&&"undefined"!=typeof Uint8Array,j=C&&(a.prototype instanceof Uint8Array||a.TYPED_ARRAY_SUPPORT);s.exports=function toBuffer(s,o){if(s instanceof a)return s;if("string"==typeof s)return a.from(s,o);if(C&&w(s)){if(0===s.byteLength)return a.alloc(0);if(j){var i=a.from(s.buffer,s.byteOffset,s.byteLength);if(i.byteLength===s.byteLength)return i}var _=s instanceof Uint8Array?s:new Uint8Array(s.buffer,s.byteOffset,s.byteLength),L=a.from(_);if(L.length===s.byteLength)return L}if(x&&s instanceof Uint8Array)return a.from(s);var B=u(s);if(B)for(var $=0;$255||~~U!==U)throw new RangeError("Array items must be numbers in the range 0-255.")}if(B||a.isBuffer(s)&&s.constructor&&"function"==typeof s.constructor.isBuffer&&s.constructor.isBuffer(s))return a.from(s);throw new TypeError('The "data" argument must be a string, an Array, a Buffer, a Uint8Array, or a DataView.')}},15389:(s,o,i)=>{var a=i(93663),u=i(87978),_=i(83488),w=i(56449),x=i(50583);s.exports=function baseIteratee(s){return"function"==typeof s?s:null==s?_:"object"==typeof s?w(s)?u(s[0],s[1]):a(s):x(s)}},15972:(s,o,i)=>{"use strict";var a=i(49724),u=i(62250),_=i(39298),w=i(92522),x=i(57382),C=w("IE_PROTO"),j=Object,L=j.prototype;s.exports=x?j.getPrototypeOf:function(s){var o=_(s);if(a(o,C))return o[C];var i=o.constructor;return u(i)&&o instanceof i?i.prototype:o instanceof j?L:null}},16038:(s,o,i)=>{var a=i(5861),u=i(40346);s.exports=function baseIsSet(s){return u(s)&&"[object Set]"==a(s)}},16426:s=>{s.exports=function(){var s=document.getSelection();if(!s.rangeCount)return function(){};for(var o=document.activeElement,i=[],a=0;a{var a=i(43360),u=i(75288),_=Object.prototype.hasOwnProperty;s.exports=function assignValue(s,o,i){var w=s[o];_.call(s,o)&&u(w,i)&&(void 0!==i||o in s)||a(s,o,i)}},16708:(s,o,i)=>{"use strict";var a,u=i(65606);function CorkedRequest(s){var o=this;this.next=null,this.entry=null,this.finish=function(){!function onCorkedFinish(s,o,i){var a=s.entry;s.entry=null;for(;a;){var u=a.callback;o.pendingcb--,u(i),a=a.next}o.corkedRequestsFree.next=s}(o,s)}}s.exports=Writable,Writable.WritableState=WritableState;var _={deprecate:i(94643)},w=i(40345),x=i(48287).Buffer,C=(void 0!==i.g?i.g:"undefined"!=typeof window?window:"undefined"!=typeof self?self:{}).Uint8Array||function(){};var j,L=i(75896),B=i(65291).getHighWaterMark,$=i(86048).F,U=$.ERR_INVALID_ARG_TYPE,V=$.ERR_METHOD_NOT_IMPLEMENTED,z=$.ERR_MULTIPLE_CALLBACK,Y=$.ERR_STREAM_CANNOT_PIPE,Z=$.ERR_STREAM_DESTROYED,ee=$.ERR_STREAM_NULL_VALUES,ie=$.ERR_STREAM_WRITE_AFTER_END,ae=$.ERR_UNKNOWN_ENCODING,ce=L.errorOrDestroy;function nop(){}function WritableState(s,o,_){a=a||i(25382),s=s||{},"boolean"!=typeof _&&(_=o instanceof a),this.objectMode=!!s.objectMode,_&&(this.objectMode=this.objectMode||!!s.writableObjectMode),this.highWaterMark=B(this,s,"writableHighWaterMark",_),this.finalCalled=!1,this.needDrain=!1,this.ending=!1,this.ended=!1,this.finished=!1,this.destroyed=!1;var w=!1===s.decodeStrings;this.decodeStrings=!w,this.defaultEncoding=s.defaultEncoding||"utf8",this.length=0,this.writing=!1,this.corked=0,this.sync=!0,this.bufferProcessing=!1,this.onwrite=function(s){!function onwrite(s,o){var i=s._writableState,a=i.sync,_=i.writecb;if("function"!=typeof _)throw new z;if(function onwriteStateUpdate(s){s.writing=!1,s.writecb=null,s.length-=s.writelen,s.writelen=0}(i),o)!function onwriteError(s,o,i,a,_){--o.pendingcb,i?(u.nextTick(_,a),u.nextTick(finishMaybe,s,o),s._writableState.errorEmitted=!0,ce(s,a)):(_(a),s._writableState.errorEmitted=!0,ce(s,a),finishMaybe(s,o))}(s,i,a,o,_);else{var w=needFinish(i)||s.destroyed;w||i.corked||i.bufferProcessing||!i.bufferedRequest||clearBuffer(s,i),a?u.nextTick(afterWrite,s,i,w,_):afterWrite(s,i,w,_)}}(o,s)},this.writecb=null,this.writelen=0,this.bufferedRequest=null,this.lastBufferedRequest=null,this.pendingcb=0,this.prefinished=!1,this.errorEmitted=!1,this.emitClose=!1!==s.emitClose,this.autoDestroy=!!s.autoDestroy,this.bufferedRequestCount=0,this.corkedRequestsFree=new CorkedRequest(this)}function Writable(s){var o=this instanceof(a=a||i(25382));if(!o&&!j.call(Writable,this))return new Writable(s);this._writableState=new WritableState(s,this,o),this.writable=!0,s&&("function"==typeof s.write&&(this._write=s.write),"function"==typeof s.writev&&(this._writev=s.writev),"function"==typeof s.destroy&&(this._destroy=s.destroy),"function"==typeof s.final&&(this._final=s.final)),w.call(this)}function doWrite(s,o,i,a,u,_,w){o.writelen=a,o.writecb=w,o.writing=!0,o.sync=!0,o.destroyed?o.onwrite(new Z("write")):i?s._writev(u,o.onwrite):s._write(u,_,o.onwrite),o.sync=!1}function afterWrite(s,o,i,a){i||function onwriteDrain(s,o){0===o.length&&o.needDrain&&(o.needDrain=!1,s.emit("drain"))}(s,o),o.pendingcb--,a(),finishMaybe(s,o)}function clearBuffer(s,o){o.bufferProcessing=!0;var i=o.bufferedRequest;if(s._writev&&i&&i.next){var a=o.bufferedRequestCount,u=new Array(a),_=o.corkedRequestsFree;_.entry=i;for(var w=0,x=!0;i;)u[w]=i,i.isBuf||(x=!1),i=i.next,w+=1;u.allBuffers=x,doWrite(s,o,!0,o.length,u,"",_.finish),o.pendingcb++,o.lastBufferedRequest=null,_.next?(o.corkedRequestsFree=_.next,_.next=null):o.corkedRequestsFree=new CorkedRequest(o),o.bufferedRequestCount=0}else{for(;i;){var C=i.chunk,j=i.encoding,L=i.callback;if(doWrite(s,o,!1,o.objectMode?1:C.length,C,j,L),i=i.next,o.bufferedRequestCount--,o.writing)break}null===i&&(o.lastBufferedRequest=null)}o.bufferedRequest=i,o.bufferProcessing=!1}function needFinish(s){return s.ending&&0===s.length&&null===s.bufferedRequest&&!s.finished&&!s.writing}function callFinal(s,o){s._final((function(i){o.pendingcb--,i&&ce(s,i),o.prefinished=!0,s.emit("prefinish"),finishMaybe(s,o)}))}function finishMaybe(s,o){var i=needFinish(o);if(i&&(function prefinish(s,o){o.prefinished||o.finalCalled||("function"!=typeof s._final||o.destroyed?(o.prefinished=!0,s.emit("prefinish")):(o.pendingcb++,o.finalCalled=!0,u.nextTick(callFinal,s,o)))}(s,o),0===o.pendingcb&&(o.finished=!0,s.emit("finish"),o.autoDestroy))){var a=s._readableState;(!a||a.autoDestroy&&a.endEmitted)&&s.destroy()}return i}i(56698)(Writable,w),WritableState.prototype.getBuffer=function getBuffer(){for(var s=this.bufferedRequest,o=[];s;)o.push(s),s=s.next;return o},function(){try{Object.defineProperty(WritableState.prototype,"buffer",{get:_.deprecate((function writableStateBufferGetter(){return this.getBuffer()}),"_writableState.buffer is deprecated. Use _writableState.getBuffer instead.","DEP0003")})}catch(s){}}(),"function"==typeof Symbol&&Symbol.hasInstance&&"function"==typeof Function.prototype[Symbol.hasInstance]?(j=Function.prototype[Symbol.hasInstance],Object.defineProperty(Writable,Symbol.hasInstance,{value:function value(s){return!!j.call(this,s)||this===Writable&&(s&&s._writableState instanceof WritableState)}})):j=function realHasInstance(s){return s instanceof this},Writable.prototype.pipe=function(){ce(this,new Y)},Writable.prototype.write=function(s,o,i){var a=this._writableState,_=!1,w=!a.objectMode&&function _isUint8Array(s){return x.isBuffer(s)||s instanceof C}(s);return w&&!x.isBuffer(s)&&(s=function _uint8ArrayToBuffer(s){return x.from(s)}(s)),"function"==typeof o&&(i=o,o=null),w?o="buffer":o||(o=a.defaultEncoding),"function"!=typeof i&&(i=nop),a.ending?function writeAfterEnd(s,o){var i=new ie;ce(s,i),u.nextTick(o,i)}(this,i):(w||function validChunk(s,o,i,a){var _;return null===i?_=new ee:"string"==typeof i||o.objectMode||(_=new U("chunk",["string","Buffer"],i)),!_||(ce(s,_),u.nextTick(a,_),!1)}(this,a,s,i))&&(a.pendingcb++,_=function writeOrBuffer(s,o,i,a,u,_){if(!i){var w=function decodeChunk(s,o,i){s.objectMode||!1===s.decodeStrings||"string"!=typeof o||(o=x.from(o,i));return o}(o,a,u);a!==w&&(i=!0,u="buffer",a=w)}var C=o.objectMode?1:a.length;o.length+=C;var j=o.length-1))throw new ae(s);return this._writableState.defaultEncoding=s,this},Object.defineProperty(Writable.prototype,"writableBuffer",{enumerable:!1,get:function get(){return this._writableState&&this._writableState.getBuffer()}}),Object.defineProperty(Writable.prototype,"writableHighWaterMark",{enumerable:!1,get:function get(){return this._writableState.highWaterMark}}),Writable.prototype._write=function(s,o,i){i(new V("_write()"))},Writable.prototype._writev=null,Writable.prototype.end=function(s,o,i){var a=this._writableState;return"function"==typeof s?(i=s,s=null,o=null):"function"==typeof o&&(i=o,o=null),null!=s&&this.write(s,o),a.corked&&(a.corked=1,this.uncork()),a.ending||function endWritable(s,o,i){o.ending=!0,finishMaybe(s,o),i&&(o.finished?u.nextTick(i):s.once("finish",i));o.ended=!0,s.writable=!1}(this,a,i),this},Object.defineProperty(Writable.prototype,"writableLength",{enumerable:!1,get:function get(){return this._writableState.length}}),Object.defineProperty(Writable.prototype,"destroyed",{enumerable:!1,get:function get(){return void 0!==this._writableState&&this._writableState.destroyed},set:function set(s){this._writableState&&(this._writableState.destroyed=s)}}),Writable.prototype.destroy=L.destroy,Writable.prototype._undestroy=L.undestroy,Writable.prototype._destroy=function(s,o){o(s)}},16946:(s,o,i)=>{"use strict";var a=i(1907),u=i(98828),_=i(45807),w=Object,x=a("".split);s.exports=u((function(){return!w("z").propertyIsEnumerable(0)}))?function(s){return"String"===_(s)?x(s,""):w(s)}:w},16962:(s,o)=>{o.aliasToReal={each:"forEach",eachRight:"forEachRight",entries:"toPairs",entriesIn:"toPairsIn",extend:"assignIn",extendAll:"assignInAll",extendAllWith:"assignInAllWith",extendWith:"assignInWith",first:"head",conforms:"conformsTo",matches:"isMatch",property:"get",__:"placeholder",F:"stubFalse",T:"stubTrue",all:"every",allPass:"overEvery",always:"constant",any:"some",anyPass:"overSome",apply:"spread",assoc:"set",assocPath:"set",complement:"negate",compose:"flowRight",contains:"includes",dissoc:"unset",dissocPath:"unset",dropLast:"dropRight",dropLastWhile:"dropRightWhile",equals:"isEqual",identical:"eq",indexBy:"keyBy",init:"initial",invertObj:"invert",juxt:"over",omitAll:"omit",nAry:"ary",path:"get",pathEq:"matchesProperty",pathOr:"getOr",paths:"at",pickAll:"pick",pipe:"flow",pluck:"map",prop:"get",propEq:"matchesProperty",propOr:"getOr",props:"at",symmetricDifference:"xor",symmetricDifferenceBy:"xorBy",symmetricDifferenceWith:"xorWith",takeLast:"takeRight",takeLastWhile:"takeRightWhile",unapply:"rest",unnest:"flatten",useWith:"overArgs",where:"conformsTo",whereEq:"isMatch",zipObj:"zipObject"},o.aryMethod={1:["assignAll","assignInAll","attempt","castArray","ceil","create","curry","curryRight","defaultsAll","defaultsDeepAll","floor","flow","flowRight","fromPairs","invert","iteratee","memoize","method","mergeAll","methodOf","mixin","nthArg","over","overEvery","overSome","rest","reverse","round","runInContext","spread","template","trim","trimEnd","trimStart","uniqueId","words","zipAll"],2:["add","after","ary","assign","assignAllWith","assignIn","assignInAllWith","at","before","bind","bindAll","bindKey","chunk","cloneDeepWith","cloneWith","concat","conformsTo","countBy","curryN","curryRightN","debounce","defaults","defaultsDeep","defaultTo","delay","difference","divide","drop","dropRight","dropRightWhile","dropWhile","endsWith","eq","every","filter","find","findIndex","findKey","findLast","findLastIndex","findLastKey","flatMap","flatMapDeep","flattenDepth","forEach","forEachRight","forIn","forInRight","forOwn","forOwnRight","get","groupBy","gt","gte","has","hasIn","includes","indexOf","intersection","invertBy","invoke","invokeMap","isEqual","isMatch","join","keyBy","lastIndexOf","lt","lte","map","mapKeys","mapValues","matchesProperty","maxBy","meanBy","merge","mergeAllWith","minBy","multiply","nth","omit","omitBy","overArgs","pad","padEnd","padStart","parseInt","partial","partialRight","partition","pick","pickBy","propertyOf","pull","pullAll","pullAt","random","range","rangeRight","rearg","reject","remove","repeat","restFrom","result","sampleSize","some","sortBy","sortedIndex","sortedIndexOf","sortedLastIndex","sortedLastIndexOf","sortedUniqBy","split","spreadFrom","startsWith","subtract","sumBy","take","takeRight","takeRightWhile","takeWhile","tap","throttle","thru","times","trimChars","trimCharsEnd","trimCharsStart","truncate","union","uniqBy","uniqWith","unset","unzipWith","without","wrap","xor","zip","zipObject","zipObjectDeep"],3:["assignInWith","assignWith","clamp","differenceBy","differenceWith","findFrom","findIndexFrom","findLastFrom","findLastIndexFrom","getOr","includesFrom","indexOfFrom","inRange","intersectionBy","intersectionWith","invokeArgs","invokeArgsMap","isEqualWith","isMatchWith","flatMapDepth","lastIndexOfFrom","mergeWith","orderBy","padChars","padCharsEnd","padCharsStart","pullAllBy","pullAllWith","rangeStep","rangeStepRight","reduce","reduceRight","replace","set","slice","sortedIndexBy","sortedLastIndexBy","transform","unionBy","unionWith","update","xorBy","xorWith","zipWith"],4:["fill","setWith","updateWith"]},o.aryRearg={2:[1,0],3:[2,0,1],4:[3,2,0,1]},o.iterateeAry={dropRightWhile:1,dropWhile:1,every:1,filter:1,find:1,findFrom:1,findIndex:1,findIndexFrom:1,findKey:1,findLast:1,findLastFrom:1,findLastIndex:1,findLastIndexFrom:1,findLastKey:1,flatMap:1,flatMapDeep:1,flatMapDepth:1,forEach:1,forEachRight:1,forIn:1,forInRight:1,forOwn:1,forOwnRight:1,map:1,mapKeys:1,mapValues:1,partition:1,reduce:2,reduceRight:2,reject:1,remove:1,some:1,takeRightWhile:1,takeWhile:1,times:1,transform:2},o.iterateeRearg={mapKeys:[1],reduceRight:[1,0]},o.methodRearg={assignInAllWith:[1,0],assignInWith:[1,2,0],assignAllWith:[1,0],assignWith:[1,2,0],differenceBy:[1,2,0],differenceWith:[1,2,0],getOr:[2,1,0],intersectionBy:[1,2,0],intersectionWith:[1,2,0],isEqualWith:[1,2,0],isMatchWith:[2,1,0],mergeAllWith:[1,0],mergeWith:[1,2,0],padChars:[2,1,0],padCharsEnd:[2,1,0],padCharsStart:[2,1,0],pullAllBy:[2,1,0],pullAllWith:[2,1,0],rangeStep:[1,2,0],rangeStepRight:[1,2,0],setWith:[3,1,2,0],sortedIndexBy:[2,1,0],sortedLastIndexBy:[2,1,0],unionBy:[1,2,0],unionWith:[1,2,0],updateWith:[3,1,2,0],xorBy:[1,2,0],xorWith:[1,2,0],zipWith:[1,2,0]},o.methodSpread={assignAll:{start:0},assignAllWith:{start:0},assignInAll:{start:0},assignInAllWith:{start:0},defaultsAll:{start:0},defaultsDeepAll:{start:0},invokeArgs:{start:2},invokeArgsMap:{start:2},mergeAll:{start:0},mergeAllWith:{start:0},partial:{start:1},partialRight:{start:1},without:{start:1},zipAll:{start:0}},o.mutate={array:{fill:!0,pull:!0,pullAll:!0,pullAllBy:!0,pullAllWith:!0,pullAt:!0,remove:!0,reverse:!0},object:{assign:!0,assignAll:!0,assignAllWith:!0,assignIn:!0,assignInAll:!0,assignInAllWith:!0,assignInWith:!0,assignWith:!0,defaults:!0,defaultsAll:!0,defaultsDeep:!0,defaultsDeepAll:!0,merge:!0,mergeAll:!0,mergeAllWith:!0,mergeWith:!0},set:{set:!0,setWith:!0,unset:!0,update:!0,updateWith:!0}},o.realToAlias=function(){var s=Object.prototype.hasOwnProperty,i=o.aliasToReal,a={};for(var u in i){var _=i[u];s.call(a,_)?a[_].push(u):a[_]=[u]}return a}(),o.remap={assignAll:"assign",assignAllWith:"assignWith",assignInAll:"assignIn",assignInAllWith:"assignInWith",curryN:"curry",curryRightN:"curryRight",defaultsAll:"defaults",defaultsDeepAll:"defaultsDeep",findFrom:"find",findIndexFrom:"findIndex",findLastFrom:"findLast",findLastIndexFrom:"findLastIndex",getOr:"get",includesFrom:"includes",indexOfFrom:"indexOf",invokeArgs:"invoke",invokeArgsMap:"invokeMap",lastIndexOfFrom:"lastIndexOf",mergeAll:"merge",mergeAllWith:"mergeWith",padChars:"pad",padCharsEnd:"padEnd",padCharsStart:"padStart",propertyOf:"get",rangeStep:"range",rangeStepRight:"rangeRight",restFrom:"rest",spreadFrom:"spread",trimChars:"trim",trimCharsEnd:"trimEnd",trimCharsStart:"trimStart",zipAll:"zip"},o.skipFixed={castArray:!0,flow:!0,flowRight:!0,iteratee:!0,mixin:!0,rearg:!0,runInContext:!0},o.skipRearg={add:!0,assign:!0,assignIn:!0,bind:!0,bindKey:!0,concat:!0,difference:!0,divide:!0,eq:!0,gt:!0,gte:!0,isEqual:!0,lt:!0,lte:!0,matchesProperty:!0,merge:!0,multiply:!0,overArgs:!0,partial:!0,partialRight:!0,propertyOf:!0,random:!0,range:!0,rangeRight:!0,subtract:!0,zip:!0,zipObject:!0,zipObjectDeep:!0}},17255:(s,o,i)=>{var a=i(47422);s.exports=function basePropertyDeep(s){return function(o){return a(o,s)}}},17285:s=>{function source(s){return s?"string"==typeof s?s:s.source:null}function lookahead(s){return concat("(?=",s,")")}function concat(...s){return s.map((s=>source(s))).join("")}function either(...s){return"("+s.map((s=>source(s))).join("|")+")"}s.exports=function xml(s){const o=concat(/[A-Z_]/,function optional(s){return concat("(",s,")?")}(/[A-Z0-9_.-]*:/),/[A-Z0-9_.-]*/),i={className:"symbol",begin:/&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/},a={begin:/\s/,contains:[{className:"meta-keyword",begin:/#?[a-z_][a-z1-9_-]+/,illegal:/\n/}]},u=s.inherit(a,{begin:/\(/,end:/\)/}),_=s.inherit(s.APOS_STRING_MODE,{className:"meta-string"}),w=s.inherit(s.QUOTE_STRING_MODE,{className:"meta-string"}),x={endsWithParent:!0,illegal:/`]+/}]}]}]};return{name:"HTML, XML",aliases:["html","xhtml","rss","atom","xjb","xsd","xsl","plist","wsf","svg"],case_insensitive:!0,contains:[{className:"meta",begin://,relevance:10,contains:[a,w,_,u,{begin:/\[/,end:/\]/,contains:[{className:"meta",begin://,contains:[a,u,w,_]}]}]},s.COMMENT(//,{relevance:10}),{begin://,relevance:10},i,{className:"meta",begin:/<\?xml/,end:/\?>/,relevance:10},{className:"tag",begin:/)/,end:/>/,keywords:{name:"style"},contains:[x],starts:{end:/<\/style>/,returnEnd:!0,subLanguage:["css","xml"]}},{className:"tag",begin:/)/,end:/>/,keywords:{name:"script"},contains:[x],starts:{end:/<\/script>/,returnEnd:!0,subLanguage:["javascript","handlebars","xml"]}},{className:"tag",begin:/<>|<\/>/},{className:"tag",begin:concat(//,/>/,/\s/)))),end:/\/?>/,contains:[{className:"name",begin:o,relevance:0,starts:x}]},{className:"tag",begin:concat(/<\//,lookahead(concat(o,/>/))),contains:[{className:"name",begin:o,relevance:0},{begin:/>/,relevance:0,endsParent:!0}]}]}}},17400:(s,o,i)=>{var a=i(99374),u=1/0;s.exports=function toFinite(s){return s?(s=a(s))===u||s===-1/0?17976931348623157e292*(s<0?-1:1):s==s?s:0:0===s?s:0}},17533:s=>{s.exports=function yaml(s){var o="true false yes no null",i="[\\w#;/?:@&=+$,.~*'()[\\]]+",a={className:"string",relevance:0,variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/},{begin:/\S+/}],contains:[s.BACKSLASH_ESCAPE,{className:"template-variable",variants:[{begin:/\{\{/,end:/\}\}/},{begin:/%\{/,end:/\}/}]}]},u=s.inherit(a,{variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/},{begin:/[^\s,{}[\]]+/}]}),_={className:"number",begin:"\\b[0-9]{4}(-[0-9][0-9]){0,2}([Tt \\t][0-9][0-9]?(:[0-9][0-9]){2})?(\\.[0-9]*)?([ \\t])*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?)?\\b"},w={end:",",endsWithParent:!0,excludeEnd:!0,keywords:o,relevance:0},x={begin:/\{/,end:/\}/,contains:[w],illegal:"\\n",relevance:0},C={begin:"\\[",end:"\\]",contains:[w],illegal:"\\n",relevance:0},j=[{className:"attr",variants:[{begin:"\\w[\\w :\\/.-]*:(?=[ \t]|$)"},{begin:'"\\w[\\w :\\/.-]*":(?=[ \t]|$)'},{begin:"'\\w[\\w :\\/.-]*':(?=[ \t]|$)"}]},{className:"meta",begin:"^---\\s*$",relevance:10},{className:"string",begin:"[\\|>]([1-9]?[+-])?[ ]*\\n( +)[^ ][^\\n]*\\n(\\2[^\\n]+\\n?)*"},{begin:"<%[%=-]?",end:"[%-]?%>",subLanguage:"ruby",excludeBegin:!0,excludeEnd:!0,relevance:0},{className:"type",begin:"!\\w+!"+i},{className:"type",begin:"!<"+i+">"},{className:"type",begin:"!"+i},{className:"type",begin:"!!"+i},{className:"meta",begin:"&"+s.UNDERSCORE_IDENT_RE+"$"},{className:"meta",begin:"\\*"+s.UNDERSCORE_IDENT_RE+"$"},{className:"bullet",begin:"-(?=[ ]|$)",relevance:0},s.HASH_COMMENT_MODE,{beginKeywords:o,keywords:{literal:o}},_,{className:"number",begin:s.C_NUMBER_RE+"\\b",relevance:0},x,C,a],L=[...j];return L.pop(),L.push(u),w.contains=L,{name:"YAML",case_insensitive:!0,aliases:["yml"],contains:j}}},17670:(s,o,i)=>{var a=i(12651);s.exports=function mapCacheDelete(s){var o=a(this,s).delete(s);return this.size-=o?1:0,o}},17965:(s,o,i)=>{"use strict";var a=i(16426),u={"text/plain":"Text","text/html":"Url",default:"Text"};s.exports=function copy(s,o){var i,_,w,x,C,j,L=!1;o||(o={}),i=o.debug||!1;try{if(w=a(),x=document.createRange(),C=document.getSelection(),(j=document.createElement("span")).textContent=s,j.ariaHidden="true",j.style.all="unset",j.style.position="fixed",j.style.top=0,j.style.clip="rect(0, 0, 0, 0)",j.style.whiteSpace="pre",j.style.webkitUserSelect="text",j.style.MozUserSelect="text",j.style.msUserSelect="text",j.style.userSelect="text",j.addEventListener("copy",(function(a){if(a.stopPropagation(),o.format)if(a.preventDefault(),void 0===a.clipboardData){i&&console.warn("unable to use e.clipboardData"),i&&console.warn("trying IE specific stuff"),window.clipboardData.clearData();var _=u[o.format]||u.default;window.clipboardData.setData(_,s)}else a.clipboardData.clearData(),a.clipboardData.setData(o.format,s);o.onCopy&&(a.preventDefault(),o.onCopy(a.clipboardData))})),document.body.appendChild(j),x.selectNodeContents(j),C.addRange(x),!document.execCommand("copy"))throw new Error("copy command was unsuccessful");L=!0}catch(a){i&&console.error("unable to copy using execCommand: ",a),i&&console.warn("trying IE specific stuff");try{window.clipboardData.setData(o.format||"text",s),o.onCopy&&o.onCopy(window.clipboardData),L=!0}catch(a){i&&console.error("unable to copy using clipboardData: ",a),i&&console.error("falling back to prompt"),_=function format(s){var o=(/mac os x/i.test(navigator.userAgent)?"⌘":"Ctrl")+"+C";return s.replace(/#{\s*key\s*}/g,o)}("message"in o?o.message:"Copy to clipboard: #{key}, Enter"),window.prompt(_,s)}}finally{C&&("function"==typeof C.removeRange?C.removeRange(x):C.removeAllRanges()),j&&document.body.removeChild(j),w()}return L}},18073:(s,o,i)=>{var a=i(85087),u=i(54641),_=i(70981);s.exports=function createRecurry(s,o,i,w,x,C,j,L,B,$){var U=8&o;o|=U?32:64,4&(o&=~(U?64:32))||(o&=-4);var V=[s,o,x,U?C:void 0,U?j:void 0,U?void 0:C,U?void 0:j,L,B,$],z=i.apply(void 0,V);return a(s)&&u(z,V),z.placeholder=w,_(z,s,o)}},19123:(s,o,i)=>{var a=i(65606),u=i(31499),_=i(88310).Stream;function resolve(s,o,i){var a,_=function create_indent(s,o){return new Array(o||0).join(s||"")}(o,i=i||0),w=s;if("object"==typeof s&&((w=s[a=Object.keys(s)[0]])&&w._elem))return w._elem.name=a,w._elem.icount=i,w._elem.indent=o,w._elem.indents=_,w._elem.interrupt=w,w._elem;var x,C=[],j=[];function get_attributes(s){Object.keys(s).forEach((function(o){C.push(function attribute(s,o){return s+'="'+u(o)+'"'}(o,s[o]))}))}switch(typeof w){case"object":if(null===w)break;w._attr&&get_attributes(w._attr),w._cdata&&j.push(("/g,"]]]]>")+"]]>"),w.forEach&&(x=!1,j.push(""),w.forEach((function(s){"object"==typeof s?"_attr"==Object.keys(s)[0]?get_attributes(s._attr):j.push(resolve(s,o,i+1)):(j.pop(),x=!0,j.push(u(s)))})),x||j.push(""));break;default:j.push(u(w))}return{name:a,interrupt:!1,attributes:C,content:j,icount:i,indents:_,indent:o}}function format(s,o,i){if("object"!=typeof o)return s(!1,o);var a=o.interrupt?1:o.content.length;function proceed(){for(;o.content.length;){var u=o.content.shift();if(void 0!==u){if(interrupt(u))return;format(s,u)}}s(!1,(a>1?o.indents:"")+(o.name?"":"")+(o.indent&&!i?"\n":"")),i&&i()}function interrupt(o){return!!o.interrupt&&(o.interrupt.append=s,o.interrupt.end=proceed,o.interrupt=!1,s(!0),!0)}if(s(!1,o.indents+(o.name?"<"+o.name:"")+(o.attributes.length?" "+o.attributes.join(" "):"")+(a?o.name?">":"":o.name?"/>":"")+(o.indent&&a>1?"\n":"")),!a)return s(!1,o.indent?"\n":"");interrupt(o)||proceed()}s.exports=function xml(s,o){"object"!=typeof o&&(o={indent:o});var i=o.stream?new _:null,u="",w=!1,x=o.indent?!0===o.indent?" ":o.indent:"",C=!0;function delay(s){C?a.nextTick(s):s()}function append(s,o){if(void 0!==o&&(u+=o),s&&!w&&(i=i||new _,w=!0),s&&w){var a=u;delay((function(){i.emit("data",a)})),u=""}}function add(s,o){format(append,resolve(s,x,x?1:0),o)}function end(){if(i){var s=u;delay((function(){i.emit("data",s),i.emit("end"),i.readable=!1,i.emit("close")}))}}return delay((function(){C=!1})),o.declaration&&function addXmlDeclaration(s){var o={version:"1.0",encoding:s.encoding||"UTF-8"};s.standalone&&(o.standalone=s.standalone),add({"?xml":{_attr:o}}),u=u.replace("/>","?>")}(o.declaration),s&&s.forEach?s.forEach((function(o,i){var a;i+1===s.length&&(a=end),add(o,a)})):add(s,end),i?(i.readable=!0,i):u},s.exports.element=s.exports.Element=function element(){var s={_elem:resolve(Array.prototype.slice.call(arguments)),push:function(s){if(!this.append)throw new Error("not assigned to a parent!");var o=this,i=this._elem.indent;format(this.append,resolve(s,i,this._elem.icount+(i?1:0)),(function(){o.append(!0)}))},close:function(s){void 0!==s&&this.push(s),this.end&&this.end()}};return s}},19219:s=>{s.exports=function cacheHas(s,o){return s.has(o)}},19287:s=>{"use strict";s.exports={CSSRuleList:0,CSSStyleDeclaration:0,CSSValueList:0,ClientRectList:0,DOMRectList:0,DOMStringList:0,DOMTokenList:1,DataTransferItemList:0,FileList:0,HTMLAllCollection:0,HTMLCollection:0,HTMLFormElement:0,HTMLSelectElement:0,MediaList:0,MimeTypeArray:0,NamedNodeMap:0,NodeList:1,PaintRequestList:0,Plugin:0,PluginArray:0,SVGLengthList:0,SVGNumberList:0,SVGPathSegList:0,SVGPointList:0,SVGStringList:0,SVGTransformList:0,SourceBufferList:0,StyleSheetList:0,TextTrackCueList:0,TextTrackList:0,TouchList:0}},19358:(s,o,i)=>{"use strict";var a=i(85582),u=i(49724),_=i(61626),w=i(88280),x=i(79192),C=i(19595),j=i(54829),L=i(34084),B=i(32096),$=i(39259),U=i(85884),V=i(39447),z=i(7376);s.exports=function(s,o,i,Y){var Z="stackTraceLimit",ee=Y?2:1,ie=s.split("."),ae=ie[ie.length-1],ce=a.apply(null,ie);if(ce){var le=ce.prototype;if(!z&&u(le,"cause")&&delete le.cause,!i)return ce;var pe=a("Error"),de=o((function(s,o){var i=B(Y?o:s,void 0),a=Y?new ce(s):new ce;return void 0!==i&&_(a,"message",i),U(a,de,a.stack,2),this&&w(le,this)&&L(a,this,de),arguments.length>ee&&$(a,arguments[ee]),a}));if(de.prototype=le,"Error"!==ae?x?x(de,pe):C(de,pe,{name:!0}):V&&Z in ce&&(j(de,ce,Z),j(de,ce,"prepareStackTrace")),C(de,ce),!z)try{le.name!==ae&&_(le,"name",ae),le.constructor=de}catch(s){}return de}}},19570:(s,o,i)=>{var a=i(37334),u=i(93243),_=i(83488),w=u?function(s,o){return u(s,"toString",{configurable:!0,enumerable:!1,value:a(o),writable:!0})}:_;s.exports=w},19595:(s,o,i)=>{"use strict";var a=i(49724),u=i(11042),_=i(13846),w=i(74284);s.exports=function(s,o,i){for(var x=u(o),C=w.f,j=_.f,L=0;L{"use strict";var a=i(23034);s.exports=a},19846:(s,o,i)=>{"use strict";var a=i(20798),u=i(98828),_=i(45951).String;s.exports=!!Object.getOwnPropertySymbols&&!u((function(){var s=Symbol("symbol detection");return!_(s)||!(Object(s)instanceof Symbol)||!Symbol.sham&&a&&a<41}))},19931:(s,o,i)=>{var a=i(31769),u=i(68090),_=i(68969),w=i(77797);s.exports=function baseUnset(s,o){return o=a(o,s),null==(s=_(s,o))||delete s[w(u(o))]}},20181:(s,o,i)=>{var a=/^\s+|\s+$/g,u=/^[-+]0x[0-9a-f]+$/i,_=/^0b[01]+$/i,w=/^0o[0-7]+$/i,x=parseInt,C="object"==typeof i.g&&i.g&&i.g.Object===Object&&i.g,j="object"==typeof self&&self&&self.Object===Object&&self,L=C||j||Function("return this")(),B=Object.prototype.toString,$=Math.max,U=Math.min,now=function(){return L.Date.now()};function isObject(s){var o=typeof s;return!!s&&("object"==o||"function"==o)}function toNumber(s){if("number"==typeof s)return s;if(function isSymbol(s){return"symbol"==typeof s||function isObjectLike(s){return!!s&&"object"==typeof s}(s)&&"[object Symbol]"==B.call(s)}(s))return NaN;if(isObject(s)){var o="function"==typeof s.valueOf?s.valueOf():s;s=isObject(o)?o+"":o}if("string"!=typeof s)return 0===s?s:+s;s=s.replace(a,"");var i=_.test(s);return i||w.test(s)?x(s.slice(2),i?2:8):u.test(s)?NaN:+s}s.exports=function debounce(s,o,i){var a,u,_,w,x,C,j=0,L=!1,B=!1,V=!0;if("function"!=typeof s)throw new TypeError("Expected a function");function invokeFunc(o){var i=a,_=u;return a=u=void 0,j=o,w=s.apply(_,i)}function shouldInvoke(s){var i=s-C;return void 0===C||i>=o||i<0||B&&s-j>=_}function timerExpired(){var s=now();if(shouldInvoke(s))return trailingEdge(s);x=setTimeout(timerExpired,function remainingWait(s){var i=o-(s-C);return B?U(i,_-(s-j)):i}(s))}function trailingEdge(s){return x=void 0,V&&a?invokeFunc(s):(a=u=void 0,w)}function debounced(){var s=now(),i=shouldInvoke(s);if(a=arguments,u=this,C=s,i){if(void 0===x)return function leadingEdge(s){return j=s,x=setTimeout(timerExpired,o),L?invokeFunc(s):w}(C);if(B)return x=setTimeout(timerExpired,o),invokeFunc(C)}return void 0===x&&(x=setTimeout(timerExpired,o)),w}return o=toNumber(o)||0,isObject(i)&&(L=!!i.leading,_=(B="maxWait"in i)?$(toNumber(i.maxWait)||0,o):_,V="trailing"in i?!!i.trailing:V),debounced.cancel=function cancel(){void 0!==x&&clearTimeout(x),j=0,a=C=u=x=void 0},debounced.flush=function flush(){return void 0===x?w:trailingEdge(now())},debounced}},20317:s=>{s.exports=function mapToArray(s){var o=-1,i=Array(s.size);return s.forEach((function(s,a){i[++o]=[a,s]})),i}},20334:(s,o,i)=>{"use strict";var a=i(48287).Buffer;class NonError extends Error{constructor(s){super(NonError._prepareSuperMessage(s)),Object.defineProperty(this,"name",{value:"NonError",configurable:!0,writable:!0}),Error.captureStackTrace&&Error.captureStackTrace(this,NonError)}static _prepareSuperMessage(s){try{return JSON.stringify(s)}catch{return String(s)}}}const u=[{property:"name",enumerable:!1},{property:"message",enumerable:!1},{property:"stack",enumerable:!1},{property:"code",enumerable:!0}],_=Symbol(".toJSON called"),destroyCircular=({from:s,seen:o,to_:i,forceEnumerable:w,maxDepth:x,depth:C})=>{const j=i||(Array.isArray(s)?[]:{});if(o.push(s),C>=x)return j;if("function"==typeof s.toJSON&&!0!==s[_])return(s=>{s[_]=!0;const o=s.toJSON();return delete s[_],o})(s);for(const[i,u]of Object.entries(s))"function"==typeof a&&a.isBuffer(u)?j[i]="[object Buffer]":"function"!=typeof u&&(u&&"object"==typeof u?o.includes(s[i])?j[i]="[Circular]":(C++,j[i]=destroyCircular({from:s[i],seen:o.slice(),forceEnumerable:w,maxDepth:x,depth:C})):j[i]=u);for(const{property:o,enumerable:i}of u)"string"==typeof s[o]&&Object.defineProperty(j,o,{value:s[o],enumerable:!!w||i,configurable:!0,writable:!0});return j};s.exports={serializeError:(s,o={})=>{const{maxDepth:i=Number.POSITIVE_INFINITY}=o;return"object"==typeof s&&null!==s?destroyCircular({from:s,seen:[],forceEnumerable:!0,maxDepth:i,depth:0}):"function"==typeof s?`[Function: ${s.name||"anonymous"}]`:s},deserializeError:(s,o={})=>{const{maxDepth:i=Number.POSITIVE_INFINITY}=o;if(s instanceof Error)return s;if("object"==typeof s&&null!==s&&!Array.isArray(s)){const o=new Error;return destroyCircular({from:s,seen:[],to_:o,maxDepth:i,depth:0}),o}return new NonError(s)}}},20426:s=>{var o=Object.prototype.hasOwnProperty;s.exports=function baseHas(s,i){return null!=s&&o.call(s,i)}},20575:(s,o,i)=>{"use strict";var a=i(3121);s.exports=function(s){return a(s.length)}},20798:(s,o,i)=>{"use strict";var a,u,_=i(45951),w=i(96794),x=_.process,C=_.Deno,j=x&&x.versions||C&&C.version,L=j&&j.v8;L&&(u=(a=L.split("."))[0]>0&&a[0]<4?1:+(a[0]+a[1])),!u&&w&&(!(a=w.match(/Edge\/(\d+)/))||a[1]>=74)&&(a=w.match(/Chrome\/(\d+)/))&&(u=+a[1]),s.exports=u},20850:(s,o,i)=>{"use strict";s.exports=i(46076)},20999:(s,o,i)=>{var a=i(69302),u=i(36800);s.exports=function createAssigner(s){return a((function(o,i){var a=-1,_=i.length,w=_>1?i[_-1]:void 0,x=_>2?i[2]:void 0;for(w=s.length>3&&"function"==typeof w?(_--,w):void 0,x&&u(i[0],i[1],x)&&(w=_<3?void 0:w,_=1),o=Object(o);++a<_;){var C=i[a];C&&s(o,C,a,w)}return o}))}},21549:(s,o,i)=>{var a=i(22032),u=i(63862),_=i(66721),w=i(12749),x=i(35749);function Hash(s){var o=-1,i=null==s?0:s.length;for(this.clear();++o{var a=i(16547),u=i(43360);s.exports=function copyObject(s,o,i,_){var w=!i;i||(i={});for(var x=-1,C=o.length;++x{var a=i(51873),u=i(37828),_=i(75288),w=i(25911),x=i(20317),C=i(84247),j=a?a.prototype:void 0,L=j?j.valueOf:void 0;s.exports=function equalByTag(s,o,i,a,j,B,$){switch(i){case"[object DataView]":if(s.byteLength!=o.byteLength||s.byteOffset!=o.byteOffset)return!1;s=s.buffer,o=o.buffer;case"[object ArrayBuffer]":return!(s.byteLength!=o.byteLength||!B(new u(s),new u(o)));case"[object Boolean]":case"[object Date]":case"[object Number]":return _(+s,+o);case"[object Error]":return s.name==o.name&&s.message==o.message;case"[object RegExp]":case"[object String]":return s==o+"";case"[object Map]":var U=x;case"[object Set]":var V=1&a;if(U||(U=C),s.size!=o.size&&!V)return!1;var z=$.get(s);if(z)return z==o;a|=2,$.set(s,o);var Y=w(U(s),U(o),a,j,B,$);return $.delete(s),Y;case"[object Symbol]":if(L)return L.call(s)==L.call(o)}return!1}},22032:(s,o,i)=>{var a=i(81042);s.exports=function hashClear(){this.__data__=a?a(null):{},this.size=0}},22225:s=>{var o="\\ud800-\\udfff",i="\\u2700-\\u27bf",a="a-z\\xdf-\\xf6\\xf8-\\xff",u="A-Z\\xc0-\\xd6\\xd8-\\xde",_="\\xac\\xb1\\xd7\\xf7\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf\\u2000-\\u206f \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000",w="["+_+"]",x="\\d+",C="["+i+"]",j="["+a+"]",L="[^"+o+_+x+i+a+u+"]",B="(?:\\ud83c[\\udde6-\\uddff]){2}",$="[\\ud800-\\udbff][\\udc00-\\udfff]",U="["+u+"]",V="(?:"+j+"|"+L+")",z="(?:"+U+"|"+L+")",Y="(?:['’](?:d|ll|m|re|s|t|ve))?",Z="(?:['’](?:D|LL|M|RE|S|T|VE))?",ee="(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?",ie="[\\ufe0e\\ufe0f]?",ae=ie+ee+("(?:\\u200d(?:"+["[^"+o+"]",B,$].join("|")+")"+ie+ee+")*"),ce="(?:"+[C,B,$].join("|")+")"+ae,le=RegExp([U+"?"+j+"+"+Y+"(?="+[w,U,"$"].join("|")+")",z+"+"+Z+"(?="+[w,U+V,"$"].join("|")+")",U+"?"+V+"+"+Y,U+"+"+Z,"\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])","\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])",x,ce].join("|"),"g");s.exports=function unicodeWords(s){return s.match(le)||[]}},22551:(s,o,i)=>{"use strict";var a=i(96540),u=i(69982);function p(s){for(var o="https://reactjs.org/docs/error-decoder.html?invariant="+s,i=1;i