Repository: weaviate/Verba Branch: main Commit: 6fb7c98d729b Files: 295 Total size: 13.9 MB Directory structure: gitextract__w7iadsp/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── verba-feature-template.md │ │ └── verba-issue-template.md │ └── workflows/ │ └── docker-image.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── FRONTEND.md ├── LICENSE ├── MANIFEST.in ├── PYTHON_TUTORIAL.md ├── README.md ├── TECHNICAL.md ├── docker-compose.yml ├── frontend/ │ ├── .eslintrc.json │ ├── .gitignore │ ├── app/ │ │ ├── api.ts │ │ ├── components/ │ │ │ ├── Chat/ │ │ │ │ ├── ChatConfig.tsx │ │ │ │ ├── ChatInterface.tsx │ │ │ │ ├── ChatMessage.tsx │ │ │ │ ├── ChatView.tsx │ │ │ │ └── StatusLabel.tsx │ │ │ ├── Document/ │ │ │ │ ├── ChunkView.tsx │ │ │ │ ├── ContentView.tsx │ │ │ │ ├── DocumentExplorer.tsx │ │ │ │ ├── DocumentMetaView.tsx │ │ │ │ ├── DocumentSearch.tsx │ │ │ │ ├── DocumentView.tsx │ │ │ │ ├── VectorView.tsx │ │ │ │ └── util.ts │ │ │ ├── Ingestion/ │ │ │ │ ├── BasicSettingView.tsx │ │ │ │ ├── ComponentView.tsx │ │ │ │ ├── ConfigurationView.tsx │ │ │ │ ├── FileComponent.tsx │ │ │ │ ├── FileSelectionView.tsx │ │ │ │ └── IngestionView.tsx │ │ │ ├── Login/ │ │ │ │ ├── GettingStarted.tsx │ │ │ │ └── LoginView.tsx │ │ │ ├── Navigation/ │ │ │ │ ├── InfoComponent.tsx │ │ │ │ ├── NavButton.tsx │ │ │ │ ├── NavbarComponent.tsx │ │ │ │ ├── StatusMessenger.tsx │ │ │ │ ├── UserModal.tsx │ │ │ │ ├── VerbaButton.tsx │ │ │ │ └── util.ts │ │ │ └── Settings/ │ │ │ ├── InfoView.tsx │ │ │ ├── SettingsComponent.tsx │ │ │ ├── SettingsView.tsx │ │ │ └── SuggestionView.tsx │ │ ├── globals.css │ │ ├── layout.tsx │ │ ├── page.tsx │ │ ├── types.ts │ │ └── util.ts │ ├── glsl.d.ts │ ├── next.config.js │ ├── package.json │ ├── postcss.config.js │ ├── public/ │ │ ├── alps_field_1k.hdr │ │ ├── cloudy.hdr │ │ ├── macbook.gltf │ │ ├── shaders/ │ │ │ ├── includes/ │ │ │ │ └── simplexNoise4d.glsl │ │ │ └── wobble/ │ │ │ ├── fragment.glsl │ │ │ └── vertex.glsl │ │ ├── verba.glb │ │ ├── verba.gltf │ │ └── weaviate.glb │ ├── tailwind.config.ts │ └── tsconfig.json ├── goldenverba/ │ ├── __init__.py │ ├── components/ │ │ ├── __init__.py │ │ ├── chunk.py │ │ ├── chunking/ │ │ │ ├── CodeChunker.py │ │ │ ├── HTMLChunker.py │ │ │ ├── JSONChunker.py │ │ │ ├── MarkdownChunker.py │ │ │ ├── RecursiveChunker.py │ │ │ ├── SemanticChunker.py │ │ │ ├── SentenceChunker.py │ │ │ ├── TokenChunker.py │ │ │ ├── __init__.py │ │ │ └── chunking_examples.py │ │ ├── document.py │ │ ├── embedding/ │ │ │ ├── CohereEmbedder.py │ │ │ ├── GoogleEmbedder.py │ │ │ ├── OllamaEmbedder.py │ │ │ ├── OpenAIEmbedder.py │ │ │ ├── SentenceTransformersEmbedder.py │ │ │ ├── UpstageEmbedder.py │ │ │ ├── VoyageAIEmbedder.py │ │ │ ├── WeaviateEmbedder.py │ │ │ └── __init__.py │ │ ├── generation/ │ │ │ ├── AnthrophicGenerator.py │ │ │ ├── CohereGenerator.py │ │ │ ├── GeminiGenerator.py │ │ │ ├── GroqGenerator.py │ │ │ ├── NovitaGenerator.py │ │ │ ├── OllamaGenerator.py │ │ │ ├── OpenAIGenerator.py │ │ │ ├── UpstageGenerator.py │ │ │ └── __init__.py │ │ ├── interfaces.py │ │ ├── managers.py │ │ ├── reader/ │ │ │ ├── AssemblyAIAPI.py │ │ │ ├── BasicReader.py │ │ │ ├── FirecrawlReader.py │ │ │ ├── GitReader.py │ │ │ ├── HTMLReader.py │ │ │ ├── UnstructuredAPI.py │ │ │ ├── UpstageDocumentParse.py │ │ │ └── __init__.py │ │ ├── retriever/ │ │ │ ├── WindowRetriever.py │ │ │ └── __init__.py │ │ ├── types.py │ │ └── util.py │ ├── server/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── cli.py │ │ ├── frontend/ │ │ │ └── out/ │ │ │ ├── 404.html │ │ │ ├── _next/ │ │ │ │ └── static/ │ │ │ │ ├── -4xCNh8fW_auOZGuG7OPj/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── 30i2FQTbR7Y4f-UfM6hbP/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── 4ajn2kvxlVqvDvSUh7-up/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── 5piNIBwi9EVvWEGUCt1HJ/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── 5rVRk1H0CSx_t9B72OCkV/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── CZvV6ohXKOaM2HZQjSr_e/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── EK3SdW_8_2ZVH1NNgqlNv/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── FEflRPdxwBXOroRwOpEz6/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── FcSX1HknjNhe9H0xxw_uy/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── I01L2Qf2M5E8rKySVdfwz/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── KoV36dmdEgYXG0yvTGe3m/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── MYxb6oMzMgqonOwG97TqG/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── QPYBqPy_EgcZIfNhJUgzL/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── SqhqgckvG9-Sxn3_Nq2Rt/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── VZK8jRGqcSpcMf2ZOPpII/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── Y3oNq6sdVkiLcuMaChJwW/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── _LnFD6u1WPj3rRxWv1Mp3/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── b8NLMEMd7UPwgQ6cqzdXK/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── chunks/ │ │ │ │ │ ├── 07115393-160e96bd79d1e493.js │ │ │ │ │ ├── 07115393-8a796b5d068e3710.js │ │ │ │ │ ├── 117-2e3ee50cbb6fc8da.js │ │ │ │ │ ├── 12038df7-a10f1b9476e02872.js │ │ │ │ │ ├── 12038df7-bbbca262706a9194.js │ │ │ │ │ ├── 12038df7-bed05817827b4802.js │ │ │ │ │ ├── 23-2fc6611658866471.js │ │ │ │ │ ├── 23-b149ce429217dd65.js │ │ │ │ │ ├── 39209d7c-1514aaef8caa025b.js │ │ │ │ │ ├── 39209d7c-9eeaeffa37c8b51e.js │ │ │ │ │ ├── 39209d7c-f601708f0876481f.js │ │ │ │ │ ├── 39aecf79-2d831b66f40a6478.js │ │ │ │ │ ├── 39aecf79-474a3d3c48835c08.js │ │ │ │ │ ├── 39aecf79-8d523cb23043db9d.js │ │ │ │ │ ├── 472688b4-2c611c3f7f6780f5.js │ │ │ │ │ ├── 472688b4-f8d4a7fb6705064c.js │ │ │ │ │ ├── 48507feb-c058e15125ba1e58.js │ │ │ │ │ ├── 48507feb-cb362b7475ba2891.js │ │ │ │ │ ├── 48507feb-fb2bbcdbd4adc32f.js │ │ │ │ │ ├── 4f53ad1b-af82feddb1367bb5.js │ │ │ │ │ ├── 4f53ad1b-cf02b0f93cf25527.js │ │ │ │ │ ├── 4f53ad1b-f1821b28dab6f189.js │ │ │ │ │ ├── 4f9d9cd8-0af0a6a5e34c4b59.js │ │ │ │ │ ├── 4f9d9cd8-7217d0ac0bfcc7ce.js │ │ │ │ │ ├── 514-cff62b6f7919676e.js │ │ │ │ │ ├── 5349c568-74bb20b84335457b.js │ │ │ │ │ ├── 5349c568-e26f968c6773abca.js │ │ │ │ │ ├── 737dfa3e-71fd4aa07f7d84a6.js │ │ │ │ │ ├── 789-e4deefde6e1de3c8.js │ │ │ │ │ ├── 864-15201e63d2e174b9.js │ │ │ │ │ ├── 8dc5345f-59beaec077e947c5.js │ │ │ │ │ ├── 8dc5345f-9a36b2be5ef7459c.js │ │ │ │ │ ├── 8dc5345f-dfd13b91b6647830.js │ │ │ │ │ ├── 8e68d877-65c524a5e56473c0.js │ │ │ │ │ ├── 8e68d877-6d32d357377fbcf8.js │ │ │ │ │ ├── 9081a741-3f102f6aee474fd0.js │ │ │ │ │ ├── 9081a741-52789bf6b11470c5.js │ │ │ │ │ ├── 9081a741-560e37099622c351.js │ │ │ │ │ ├── 9081a741-599fa98ab2a737de.js │ │ │ │ │ ├── 9081a741-a7c6599b4221aee8.js │ │ │ │ │ ├── 949-4b7f5a091d97fe2c.js │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── _not-found/ │ │ │ │ │ │ │ ├── page-26d6c07435b6028b.js │ │ │ │ │ │ │ ├── page-aaadc9fa354a98b4.js │ │ │ │ │ │ │ └── page-d0fe2d040a50a096.js │ │ │ │ │ │ ├── layout-213bbf4a992a0fc8.js │ │ │ │ │ │ ├── layout-ad434663ed5b1d3a.js │ │ │ │ │ │ ├── page-09caddcec70c8f21.js │ │ │ │ │ │ ├── page-107cfe4c05eff918.js │ │ │ │ │ │ ├── page-1afb38c40393046f.js │ │ │ │ │ │ ├── page-235aaff141d9263e.js │ │ │ │ │ │ ├── page-25ed1cb73822cf4e.js │ │ │ │ │ │ ├── page-317294c5dcc5eacf.js │ │ │ │ │ │ ├── page-345642b96638188b.js │ │ │ │ │ │ ├── page-3617b8292b21fc34.js │ │ │ │ │ │ ├── page-45b34f73d34e8979.js │ │ │ │ │ │ ├── page-54094d73aee6e252.js │ │ │ │ │ │ ├── page-5e9b74ed5a1be60e.js │ │ │ │ │ │ ├── page-61a9fa55e8011365.js │ │ │ │ │ │ ├── page-63c3d501186e1569.js │ │ │ │ │ │ ├── page-8184afaea67792bd.js │ │ │ │ │ │ ├── page-a985ad06a3dcc355.js │ │ │ │ │ │ ├── page-b8f5daf1c95250b5.js │ │ │ │ │ │ ├── page-c506ad2929c6e6a6.js │ │ │ │ │ │ ├── page-de101b2b6802134a.js │ │ │ │ │ │ ├── page-e15dc604b4004fa4.js │ │ │ │ │ │ ├── page-f09784266b0febdd.js │ │ │ │ │ │ ├── page-f3aef3f90382cf14.js │ │ │ │ │ │ ├── page-f7e98beb8ba64d62.js │ │ │ │ │ │ └── page-fc144a2905205d63.js │ │ │ │ │ ├── b536a0f1-cb79989225eaf318.js │ │ │ │ │ ├── b536a0f1-d8ed58bb9dee20ad.js │ │ │ │ │ ├── b536a0f1-ea1620b5a816a5f5.js │ │ │ │ │ ├── bc9c3264-007b030a3d8b973f.js │ │ │ │ │ ├── bc9c3264-ebee20cfb15b0079.js │ │ │ │ │ ├── ec3863c0-06201a77cd4ecf61.js │ │ │ │ │ ├── ec3863c0-0f36e1c42d4712ac.js │ │ │ │ │ ├── ec3863c0-3299d7812f924fb2.js │ │ │ │ │ ├── ec3863c0-666dbee8e5822499.js │ │ │ │ │ ├── ec3863c0-b59cee7fa657bb4d.js │ │ │ │ │ ├── fd9d1056-3c0a5e4377f054b9.js │ │ │ │ │ ├── fd9d1056-5afc1fda0426cd7c.js │ │ │ │ │ ├── fd9d1056-aa9b9ca480dc9276.js │ │ │ │ │ ├── framework-00a8ba1a63cfdc9e.js │ │ │ │ │ ├── main-0806fb9f50640b69.js │ │ │ │ │ ├── main-ab370d8db521f1bb.js │ │ │ │ │ ├── main-app-6d8fe3bc29305481.js │ │ │ │ │ ├── main-app-8b51a742f61d77e1.js │ │ │ │ │ ├── pages/ │ │ │ │ │ │ ├── _app-037b5d058bd9a820.js │ │ │ │ │ │ ├── _app-15e2daefa259f0b5.js │ │ │ │ │ │ ├── _error-28b803cb2479b966.js │ │ │ │ │ │ └── _error-6ae619510b1539d6.js │ │ │ │ │ ├── polyfills-42372ed130431b0a.js │ │ │ │ │ ├── polyfills-78c92fac7aa8fdd8.js │ │ │ │ │ ├── webpack-e6d8fc8882b3cc5a.js │ │ │ │ │ └── webpack-f81a484e456a776b.js │ │ │ │ ├── cpNG_krRh40F8ypWaUA1Y/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── css/ │ │ │ │ │ ├── 0ffc59f004ebeefe.css │ │ │ │ │ ├── 2817a3d3bb800805.css │ │ │ │ │ ├── ae5cf92d392439f8.css │ │ │ │ │ ├── b219313ebc6bfc74.css │ │ │ │ │ ├── b6698fd034c37467.css │ │ │ │ │ ├── b7332ce1649563ef.css │ │ │ │ │ ├── bbb58f872eab5c8f.css │ │ │ │ │ ├── d919237d8b9336c0.css │ │ │ │ │ ├── fb11073f70ca561d.css │ │ │ │ │ └── fd8d77da4d075ee8.css │ │ │ │ ├── gjEH1UGrJtEx0lJhUhCoG/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── ifjjNsXVlVkzB0f2yWOlr/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── loIKTVc-LZ1tLrqnZ0_At/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── pR7o7Axy7wuvopYTQQ4gp/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── ppWjoMOwBDfvgLeNC4RNs/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── sJVEctNLIfCe2KAU81egg/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ ├── tVocASfF6DGpJwQWyOdO9/ │ │ │ │ │ ├── _buildManifest.js │ │ │ │ │ └── _ssgManifest.js │ │ │ │ └── yPC90wje_7V701wv-Jubb/ │ │ │ │ ├── _buildManifest.js │ │ │ │ └── _ssgManifest.js │ │ │ ├── alps_field_1k.hdr │ │ │ ├── cloudy.hdr │ │ │ ├── index.html │ │ │ ├── index.txt │ │ │ ├── macbook.gltf │ │ │ ├── shaders/ │ │ │ │ ├── includes/ │ │ │ │ │ └── simplexNoise4d.glsl │ │ │ │ └── wobble/ │ │ │ │ ├── fragment.glsl │ │ │ │ └── vertex.glsl │ │ │ ├── verba.glb │ │ │ ├── verba.gltf │ │ │ └── weaviate.glb │ │ ├── helpers.py │ │ └── types.py │ ├── tests/ │ │ └── document/ │ │ └── test_document.py │ └── verba_manager.py ├── pypi_commands.sh └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/verba-feature-template.md ================================================ --- name: Verba Feature Template about: Request a new feature for Verba title: "" labels: "enhancement" assignees: "" --- ## Description ## Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/verba-issue-template.md ================================================ --- name: Verba Issue Template about: Encountering errors or issues with Verba title: "" labels: "" assignees: "" --- ## Description ## Installation - [ ] pip install goldenverba - [ ] pip install from source - [ ] Docker installation If you installed via pip, please specify the version: ## Weaviate Deployment - [ ] Local Deployment - [ ] Docker Deployment - [ ] Cloud Deployment ## Configuration Reader: Chunker: Embedder: Retriever: Generator: ## Steps to Reproduce ## Additional context ================================================ FILE: .github/workflows/docker-image.yml ================================================ name: Docker Build and Push on: push: branches: ["main"] jobs: build-and-push: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to DockerHub uses: docker/login-action@v3 with: username: ${{secrets.DOCKER_USERNAME}} password: ${{secrets.DOCKER_PASSWORD}} - name: Build and push uses: docker/build-push-action@v4 with: context: . file: ./Dockerfile push: true tags: semitechnologies/verba:latest platforms: linux/amd64,linux/arm64 ================================================ FILE: .gitignore ================================================ .env .env* __pycache__ .DS_Store .pytest_cache .python-version *.egg-info venv venv* dist build ~ .local .cache .verba .vscode verba_config.json text-generation-inference test.py cache.txt .ruff_cache *_secrets.json ollama ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. ## [2.1.3] More data types ## Added - Added `OLLAMA_MODEL` and `OLLAMA_EMBED_MODEL` environment variables (https://github.com/weaviate/Verba/pull/372) - Hiding `Getting Started` display after showing once - Added support for `csv` `xlsx` `xls` for the `DefaultReader` ## [2.1.2] Adding Novita! ## Added - Added Novita Generator (https://www.novita.ai/) - Added basic tests for Document class ## Fixed - spaCy Language Issues (https://github.com/weaviate/Verba/issues/359#issuecomment-2612233766) (https://github.com/weaviate/Verba/issues/352) ## [2.1.1] More Bugs! ## Added - Dynamic model name retrieval for OpenAI Generator based on OpenAI URL and API Key (https://github.com/weaviate/Verba/issues/123) (https://github.com/weaviate/Verba/issues/362) ## [2.1.0] Fixing Bugs and Adding Friends ## Added - Added Upstage: Reader, Embedder, Generator (https://www.upstage.ai/) - Added new deployment type: Custom - Added new port configuration (https://github.com/weaviate/Verba/issues/308) - Added Groq (https://github.com/weaviate/Verba/pull/278) - AssemblyAI Reader for audio files (https://github.com/weaviate/Verba/pull/283) - Language Detection for languages like chinese, english, french, german and dutch (https://github.com/weaviate/Verba/pull/302) - Improve Markdown Chunking (https://github.com/weaviate/Verba/pull/323) - Upgrade to latest Weaviate Client - Added Ollama to Docker Compose - Added Verba to Docker Hub - Added default deployment to skip login screen (https://github.com/weaviate/Verba/issues/305) ## Fixed - Catch Exception when trying to access the OpenAI API Embedding endpoint to retrieve model names - Fixed reading empty string as environment variables (https://github.com/weaviate/Verba/pull/300) - Fixed default Unstructed URL (https://github.com/weaviate/Verba/pull/295) - Changed Collection names to prevent conflicts with other older Verba version generated collections - Ensure Ollama URL is parsed correctly (https://github.com/weaviate/Verba/pull/327) - Fixing typos (https://github.com/weaviate/Verba/pull/329) - System Message is now a textarea in the frontend (https://github.com/weaviate/Verba/issues/334) - Race Condition when multiple requests try to create same client (https://github.com/weaviate/Verba/issues/335) - Fix wrong data types when querying additional chunks - Remove legacy code (https://github.com/weaviate/Verba/issues/284) - Change Python version range below 3.13 - Firecrawl Reader missing metadata (https://github.com/weaviate/Verba/issues/280) ## [2.0.0] Importastic ## Added - Async Ingestion with realtime logging - Migrated to Weaviate v4 Client - Added new File Selection Interface - Add Directory Upload - Control Settings per file/url individually - Import indivdual files or all - Overwrite existing files - Add multiple labels to documents - More configuration for readers, chunkers, and embedders - Improved Document Search UI - Add Config Validation - Add HTML Reader - Add Recursive Chunker - HTML Chunker - Markdown Chunker - Code Import - Code Chunking - Semantic Chunking - Label Filter - Document Filter (Add document to chat) - Add more themes - Reworked Admin Interface - Added Suggestion View - Reworked Suggestion logic - Added VoyageAI - Added custom metadata - Added DocumentExplorer with - Content View - Chunk View - Vector View - Visualize vectors of chunks of one or multiple documents - PCA - ## [1.0.3] ## Added - Cancel Generation Button - Added .docx support - Added Documentation for JSON Files - Added GitLabReader (https://github.com/weaviate/Verba/pull/151) - Improved HuggingFace Embedding Models thanks to @tomaarsen - MixedBreadEmbedder - AllMPNetEmbedder ## Fixed - Check error logs coming from Ollama and send it to the frontend - Check If Chunks Are NoneType ## [1.0.2] ## Added - Readme Variable: OPENAI_BASE_URL ## Fixed - https://github.com/weaviate/Verba/pull/173 - https://github.com/weaviate/Verba/pull/163 - https://github.com/weaviate/Verba/pull/148 ## [1.0.0] - Beautiful Verba Update ### Added - Added DaisyUI - Optimized frontend codebase - Fully Reworked Verba Design - Fully Responsive, optimized for all screen sizes - Customization Capabilities - Added Default, Darkmode, Weaviate themes - Full text, color, image customization - Improve Chat Interface - Better formatting of markdown + code - Keep conversations saved in localBrowser storage - Better Debugging by providing more information about current states - Improve Document Viewer Interface - Add Pagination - Add Sorting - Use Aggregation for Filtering - Improve Status Overview - Reworked Frontend + Optimize Code - Sort status entries - Improve Loading Speed by using Aggregation - Improve Component Selection for both Ingestion and RAG - Added new configuraiton that will be passed between frontend and backend - Cleaned codebase, merged interfaces and managers to single files - Added clean endpoints for better code readability - Reworked on interfaces - Added better console and logging for ingestion - More Configuration - Enable/Disable Caching and Autocomplete Suggestions - Improved verba_config.json - Ability to enable/disable caching + autosuggestions - Add Google Gemini as new Embedder and Generator - Added .CSV support (all file types available in Unstructured IO) - More test data - Add Ollama as Generator and Embedding Component - Add Support for Cohere R+ - Improved WindowRetriever Context Generation - Show RAW Context in Frontend + Save in LocalStorage - Save Settings and Configuration in Weaviate ### Changed - Changed to AppRouter framework - Changed frontend project structure - Changed backend project structure - Removed Llama Generator Component ### Fixed - Using Accelerator Library ## [0.4.0] - 11.04.2024 ### Added - Improved Docker Documentation - Improved Docker Settings - New Environment Variables for OpenAI proxies: OpenAI_BASE_URL (LiteLLM support) (https://github.com/weaviate/Verba/issues/56) - Increased version ### Changed - Removed spaCy from project ### Fixed - Python not working on version 3.12, 3.11, and 3.9 - GitHub Links on README - Fix Docker Default Vectorizer (https://github.com/weaviate/Verba/issues/50) - Fix requirements.txt spelling error - Minor Bug fixes ## [0.3.1] - 15.11.2023 ### Added - PDFReader powered by PyPDF2 - TokenChunker powered by tiktoken - Ruff Linting (set as pre-commit) - Markdown Formatting for chat messages (https://github.com/weaviate/Verba/issues/48) ### Fixed - Added missing dependencies - Fixed restart bug - Fixed MiniLM Cuda to_device bug (https://github.com/weaviate/Verba/issues/41) - Fixed Config Issues (https://github.com/weaviate/Verba/issues/51) - Fixed Weaviate Embedded Headers for Cohere ## [0.3.0] - 12.09.2023 ### Added - Refactor modular architecture - Add ability to import data through the frontend, CLI, and script - Add Readers (SimpleReader, PathReader, GithubReader, PDFReader) - Add Chunkers (WordChunker, SentenceChunker) - Add Embedders (ADAEmbedder,SentenceTransformer, Cohere) - Add Generators (GPT3, GPT4, LLama, Cohere) - Status Page - Reset functionality - Streaming Token Generation - Lazy Document Loading - Add Copy and Cached Tag - Improved Semantic Cache - Added LLama 2 and Cohere support - Added new OpenAI models - Improved Documentation - Added technical docs and contribution guidelines ### Fixed - Error handling for data ingestion (handling chunk size) - Schmea handling on startup ### Changed - Removed Simple- and AdvancedEngine logic ## [0.2.3] - 05.09.2023 ### Added - OpenAI API documentation example dataset ## [0.2.2] - 31.08.2023 ### Release! - First version of Verba released! (many to come :) ### Added - Verba favicon ### Fixed - Add static files to package - Weaviate Embedded not shutting down ## [0.1.0] - 29.08.2023 ### Added - Prepare Verba for first release ================================================ FILE: CONTRIBUTING.md ================================================ # Verba Contribution Guidelines Welcome to the Verba community! We're thrilled that you're interested in contributing to the Verba project. Verba is a collaborative open-source project, and we believe that everyone has something unique to contribute. Below you'll find our guidelines which aim to make contributing to Verba a respectful and pleasant experience for everyone. ## 🌟 Community and Open Source Open source is at the heart of Verba. We appreciate feedback, ideas, and enhancements from the community. Whether you're looking to fix a bug, add a new feature, or simply improve the documentation, your contribution is important to us. ## 📚 Before You Begin Before contributing, please take a moment to read through the [README](https://github.com/weaviate/Verba/README.md) and the [Technical Documentation](https://github.com/weaviate/Verba/TECHNICAL.md). These documents provide a comprehensive understanding of the project and are essential reading to ensure that we're all on the same page. Please note that the technical documentation is a work in progress and will be updated as we progress. ## 🐛 Reporting Issues If you've identified a bug or have an idea for an enhancement, please begin by creating an Issue. Here's how: - Check the Issue tracker to ensure the bug or enhancement hasn't already been reported. - Clearly describe the issue including steps to reproduce when it is a bug. - Include as much relevant information as possible. ## 💡 Ideas and Feedback We welcome all ideas and feedback. If you're not ready to open an Issue or if you're just looking for a place to discuss ideas, head over to our [GitHub Discussions](https://github.com/weaviate/Verba/discussions) or the [Weaviate Support Page](https://forum.weaviate.io/). ## 🧪 Testing We use [pytest](https://docs.pytest.org) for testing. Please note that the tests are WIP and some are missing. We still encourage you to run the tests and add more tests as you see fit. To run the tests, use the following command: ```bash pytest goldenverba/tests ``` ## 📝 Pull Requests If you're ready to contribute code or documentation, please submit a Pull Request (PR) to the dev branch. Here's the process: - Fork the repository and create your branch from `main`. - Ensure that your code adheres to the existing code style. Use [Black](https://github.com/psf/black) for formatting Python code. - If you're adding a new feature, consider writing unit tests and documenting the feature. - Verify that your changes pass existing unit tests - Make sure your code lints (mypy compatibility is optional but encouraged). - Include a clear description of your changes in the PR. - Link to the Issue in your PR description. ### 🔄 Pull Request Process - PRs are reviewed on a regular basis. - Engage in the conversation and make requested updates to your PR if needed. - Once approved, your PR will be merged into the main branch by a maintainer. ## 🗨️ Stay Connected We encourage you to join our community channels. Stay connected, share ideas, and get to know fellow contributors. Thank you for being a part of Verba. Your contributions not only help improve the project but also the wider community of users and developers. Happy contributing! ================================================ FILE: Dockerfile ================================================ FROM python:3.11 WORKDIR /Verba COPY . /Verba RUN pip install '.' EXPOSE 8000 CMD ["verba", "start","--port","8000","--host","0.0.0.0"] ================================================ FILE: FRONTEND.md ================================================ # Verba - Frontend Documentation Verba's Frontend is a [NextJS](https://nextjs.org/) application used together with [TailwindCSS](https://tailwindcss.com/) and [DaisyUI](https://daisyui.com/). ## 🚀 Setting Up the Frontend To get your local copy of the Verba frontend up and running, please follow these simple steps: 1. Clone Repository ```git git clone https://github.com/weaviate/Verba.git ``` 1. **Node.js Requirement**: - Confirm that Node.js version `>=21.3.0` is installed on your system. If you need to install or update Node.js, visit the official [Node.js website](https://nodejs.org/). 2. **Installation**: - Navigate to the frontend directory: `cd frontend` - Run `npm install` to install the dependencies required for the project. 3. **Development Server**: - Launch the application in development mode by executing `npm run dev`. - Open your web browser and visit `http://localhost:3000` to view the application. ## 📦 Building Static Pages for FastAPI If you wish to serve and update the frontend through FastAPI, you need to build static pages: 1. **Build Process**: - Execute `npm run build` to generate the static production build. The output will be directed to the FastAPI folder configured to serve the static content. ================================================ FILE: LICENSE ================================================ Copyright (c) 2020-2023, Weaviate B.V. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: MANIFEST.in ================================================ recursive-include goldenverba/server/frontend/out * ================================================ FILE: PYTHON_TUTORIAL.md ================================================ # Installing Python and Setting Up a Virtual Environment Before you can use Verba, you'll need to ensure that `Python >=3.10.0` is installed on your system and that you can create a virtual environment for a safer and cleaner project setup. ## Installing Python Python is required to run Verba. If you don't have Python installed, follow these steps: ### For Windows: Download the latest Python installer from the official Python website. Run the installer and make sure to check the box that says `Add Python to PATH` during installation. ### For macOS: You can install Python using Homebrew, a package manager for macOS, with the following command in the terminal: ``` /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" ``` Then install Python: ``` brew install python ``` ### For Linux: Python usually comes pre-installed on most Linux distributions. If it's not, you can install it using your distribution's package manager. You can read more about it [here](https://opensource.com/article/20/4/install-python-linux) ## Setting Up a Virtual Environment It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide Python packages. ### Install the virtualenv package: First, ensure you have pip installed (it comes with Python if you're using version 3.4 and above). Install virtualenv by running: ``` pip install virtualenv ``` ### Create a Virtual Environment: Navigate to your project's directory in the terminal. Run the following command to create a virtual environment named venv (you can name it anything you like): ``` python3 -m virtualenv venv ``` ### Activate the Virtual Environment: - On Windows, activate the virtual environment by running: ``` venv\Scripts\activate.bat ``` - On macOS and Linux, activate it with: ``` source venv/bin/activate ``` Once your virtual environment is activated, you'll see its name in the terminal prompt. Now you're ready to install Verba using the steps provided in the Quickstart sections. > Remember to deactivate the virtual environment when you're done working with Verba by simply running deactivate in the terminal. ================================================ FILE: README.md ================================================ # Verba ## The Golden RAGtriever - Community Edition ✨ [![Weaviate](https://img.shields.io/static/v1?label=powered%20by&message=Weaviate%20%E2%9D%A4&color=green&style=flat-square)](https://weaviate.io/) [![PyPi downloads](https://static.pepy.tech/personalized-badge/goldenverba?period=total&units=international_system&left_color=grey&right_color=orange&left_text=pip%20downloads)](https://pypi.org/project/goldenverba/) [![Docker support](https://img.shields.io/badge/Docker_support-%E2%9C%93-4c1?style=flat-square&logo=docker&logoColor=white)](https://docs.docker.com/get-started/) [![Demo](https://img.shields.io/badge/Check%20out%20the%20demo!-yellow?&style=flat-square&logo=react&logoColor=white)](https://verba.weaviate.io/) Welcome to Verba: The Golden RAGtriever, an community-driven open-source application designed to offer an end-to-end, streamlined, and user-friendly interface for Retrieval-Augmented Generation (RAG) out of the box. In just a few easy steps, explore your datasets and extract insights with ease, either locally with Ollama and Huggingface or through LLM providers such as Anthrophic, Cohere, and OpenAI. This project is built with and for the community, please be aware that it might not be maintained with the same urgency as other Weaviate production applications. Feel free to contribute to the project and help us make Verba even better! <3 ``` pip install goldenverba ``` ![Demo of Verba](https://github.com/weaviate/Verba/blob/2.0.0/img/verba.gif) - [Verba](#verba) - [🎯 What Is Verba?](#what-is-verba) - [✨ Features](#feature-lists) - [✨ Getting Started with Verba](#getting-started-with-verba) - [🔑 API Keys](#api-keys) - [Weaviate](#weaviate) - [Ollama](#ollama) - [Unstructured](#unstructured) - [AssemblyAI](#assemblyai) - [OpenAI](#openai) - [HuggingFace](#huggingface) - [Groq](#groq) - [Novita AI](#novitaai) - [Quickstart: Deploy with pip](#how-to-deploy-with-pip) - [Quickstart: Build from Source](#how-to-build-from-source) - [Quickstart: Deploy with Docker](#how-to-install-verba-with-docker) - [💾 Verba Walkthrough](#️verba-walkthrough) - [💖 Open Source Contribution](#open-source-contribution) - [🚩 Known Issues](#known-issues) - [❔FAQ](#faq) ## What Is Verba? Verba is a fully-customizable personal assistant utilizing [Retrieval Augmented Generation (RAG)](https://weaviate.io/rag#:~:text=RAG%20with%20Weaviate,accuracy%20of%20AI%2Dgenerated%20content.) for querying and interacting with your data, **either locally or deployed via cloud**. Resolve questions around your documents, cross-reference multiple data points or gain insights from existing knowledge bases. Verba combines state-of-the-art RAG techniques with Weaviate's context-aware database. Choose between different RAG frameworks, data types, chunking & retrieving techniques, and LLM providers based on your individual use-case. ## Open Source Spirit **Weaviate** is proud to offer this open-source project for the community. While we strive to address issues as fast as we can, please understand that it may not be maintained with the same rigor as production software. We welcome and encourage community contributions to help keep it running smoothly. Your support in fixing open issues quickly is greatly appreciated. ### Watch our newest Verba video here: [![VIDEO LINK](https://github.com/weaviate/Verba/blob/main/img/thumbnail.png)](https://www.youtube.com/watch?v=2VCy-YjRRhA&t=40s&ab_channel=Weaviate%E2%80%A2VectorDatabase) ## Feature Lists | 🤖 Model Support | Implemented | Description | | --------------------------------- | ----------- | ------------------------------------------------------- | | Ollama (e.g. Llama3) | ✅ | Local Embedding and Generation Models powered by Ollama | | HuggingFace (e.g. MiniLMEmbedder) | ✅ | Local Embedding Models powered by HuggingFace | | Cohere (e.g. Command R+) | ✅ | Embedding and Generation Models by Cohere | | Anthrophic (e.g. Claude Sonnet) | ✅ | Embedding and Generation Models by Anthrophic | | OpenAI (e.g. GPT4) | ✅ | Embedding and Generation Models by OpenAI | | Groq (e.g. Llama3) | ✅ | Generation Models by Groq (LPU inference) | | Novita AI (e.g. Llama3.3) | ✅ | Generation Models by Novita AI | | Upstage (e.g. Solar) | ✅ | Embedding and Generation Models by Upstage | | 🤖 Embedding Support | Implemented | Description | | -------------------- | ----------- | ---------------------------------------- | | Weaviate | ✅ | Embedding Models powered by Weaviate | | Ollama | ✅ | Local Embedding Models powered by Ollama | | SentenceTransformers | ✅ | Embedding Models powered by HuggingFace | | Cohere | ✅ | Embedding Models by Cohere | | VoyageAI | ✅ | Embedding Models by VoyageAI | | OpenAI | ✅ | Embedding Models by OpenAI | | Upstage | ✅ | Embedding Models by Upstage | | 📁 Data Support | Implemented | Description | | -------------------------------------------------------- | ----------- | ---------------------------------------------- | | [UnstructuredIO](https://docs.unstructured.io/welcome) | ✅ | Import Data through Unstructured | | [Firecrawl](https://www.firecrawl.dev/) | ✅ | Scrape and Crawl URL through Firecrawl | | [UpstageDocumentParse](https://upstage.ai/) | ✅ | Parse Documents through Upstage Document AI | | PDF Ingestion | ✅ | Import PDF into Verba | | GitHub & GitLab | ✅ | Import Files from Github and GitLab | | CSV/XLSX Ingestion | ✅ | Import Table Data into Verba | | .DOCX | ✅ | Import .docx files | | Multi-Modal (using [AssemblyAI](https://assemblyai.com)) | ✅ | Import and Transcribe Audio through AssemblyAI | | ✨ RAG Features | Implemented | Description | | ----------------------- | --------------- | ------------------------------------------------------------------------- | | Hybrid Search | ✅ | Semantic Search combined with Keyword Search | | Autocomplete Suggestion | ✅ | Verba suggests autocompletion | | Filtering | ✅ | Apply Filters (e.g. documents, document types etc.) before performing RAG | | Customizable Metadata | ✅ | Free control over Metadata | | Async Ingestion | ✅ | Ingest data asynchronously to speed up the process | | Advanced Querying | planned ⏱️ | Task Delegation Based on LLM Evaluation | | Reranking | planned ⏱️ | Rerank results based on context for improved results | | RAG Evaluation | planned ⏱️ | Interface for Evaluating RAG pipelines | | Agentic RAG | out of scope ❌ | Agentic RAG pipelines | | Graph RAG | out of scope ❌ | Graph-based RAG pipelines | | 🗡️ Chunking Techniques | Implemented | Description | | ---------------------- | ----------- | ------------------------------------------------------- | | Token | ✅ | Chunk by Token powered by [spaCy](https://spacy.io/) | | Sentence | ✅ | Chunk by Sentence powered by [spaCy](https://spacy.io/) | | Semantic | ✅ | Chunk and group by semantic sentence similarity | | Recursive | ✅ | Recursively chunk data based on rules | | HTML | ✅ | Chunk HTML files | | Markdown | ✅ | Chunk Markdown files | | Code | ✅ | Chunk Code files | | JSON | ✅ | Chunk JSON files | | 🆒 Cool Bonus | Implemented | Description | | ------------------------ | --------------- | ------------------------------------------------------- | | Docker Support | ✅ | Verba is deployable via Docker | | Customizable Frontend | ✅ | Verba's frontend is fully-customizable via the frontend | | Vector Viewer | ✅ | Visualize your data in 3D | | Multi-User Collaboration | out of scope ❌ | Multi-User Collaboration in Verba | | 🤝 RAG Libraries | Implemented | Description | | ---------------- | ----------- | ---------------------------------- | | LangChain | ✅ | Implement LangChain RAG pipelines | | Haystack | planned ⏱️ | Implement Haystack RAG pipelines | | LlamaIndex | planned ⏱️ | Implement LlamaIndex RAG pipelines | > Something is missing? Feel free to create a new issue or discussion with your idea! ![Showcase of Verba](https://github.com/weaviate/Verba/blob/2.0.0/img/verba_screen.png) --- # Getting Started with Verba You have three deployment options for Verba: - Install via pip ``` pip install goldenverba ``` - Build from Source ``` git clone https://github.com/weaviate/Verba pip install -e . ``` - Use Docker for Deployment **Prerequisites**: If you're not using Docker, ensure that you have `Python >=3.10.0,<3.13.0` installed on your system. ``` git clone https://github.com/weaviate/Verba docker compose --env-file up -d --build ``` If you're unfamiliar with Python and Virtual Environments, please read the [python tutorial guidelines](./PYTHON_TUTORIAL.md). # API Keys and Environment Variables You can set all API keys in the Verba frontend, but to make your life easier, we can also prepare a `.env` file in which Verba will automatically look for the keys. Create a `.env` in the same directory you want to start Verba in. You can find an `.env.example` file in the [goldenverba](./goldenverba/.env.example) directory. > Make sure to only set environment variables you intend to use, environment variables with missing or incorrect values may lead to errors. Below is a comprehensive list of the API keys and variables you may require: | Environment Variable | Value | Description | | ---------------------- | ---------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | | WEAVIATE_URL_VERBA | URL to your hosted Weaviate Cluster | Connect to your [WCS](https://console.weaviate.cloud/) Cluster | | WEAVIATE_API_KEY_VERBA | API Credentials to your hosted Weaviate Cluster | Connect to your [WCS](https://console.weaviate.cloud/) Cluster | | ANTHROPIC_API_KEY | Your Anthropic API Key | Get Access to [Anthropic](https://www.anthropic.com/) Models | | OPENAI_API_KEY | Your OpenAI Key | Get Access to [OpenAI](https://openai.com/) Models | | OPENAI_EMBED_API_KEY | Your OpenAI Key | Use a different endpoint for embeddings | | OPENAI_BASE_URL | URL to OpenAI instance | Models | | OPENAI_EMBED_BASE_URL | URL to OpenAI instance | Use a different endpoint for embeddings | | OPENAI_MODEL | The name of the model to be used when selecting OpenAI as a Generator | Default: the first model in the list returned by the endpoint | | OPENAI_EMBED_MODEL | The name of the OpenAI embedding model to be used when selecting OpenAI as an Embedder | Default: `text-embedding-3-small` | | OPENAI_CUSTOM_EMBED | `true` \| `false` | Allow Verba to recognize custom embedding model names (not only OpenAI ones) | | COHERE_API_KEY | Your API Key | Get Access to [Cohere](https://cohere.com/) Models | | GROQ_API_KEY | Your Groq API Key | Get Access to [Groq](https://groq.com/) Models | | NOVITA_API_KEY | Your Novita API Key | Get Access to [Novita AI](https://novita.ai?utm_source=github_verba&utm_medium=github_readme&utm_campaign=github_link) Models | | OLLAMA_URL | URL to your Ollama instance (e.g. http://localhost:11434 ) | Get Access to [Ollama](https://ollama.com/) Models | | UNSTRUCTURED_API_KEY | Your API Key | Get Access to [Unstructured](https://docs.unstructured.io/welcome) Data Ingestion | | UNSTRUCTURED_API_URL | URL to Unstructured Instance | Get Access to [Unstructured](https://docs.unstructured.io/welcome) Data Ingestion | | ASSEMBLYAI_API_KEY | Your API Key | Get Access to [AssemblyAI](https://assemblyai.com) Data Ingestion | | GITHUB_TOKEN | Your GitHub Token | Get Access to Data Ingestion via GitHub | | GITLAB_TOKEN | Your GitLab Token | Get Access to Data Ingestion via GitLab | | FIRECRAWL_API_KEY | Your Firecrawl API Key | Get Access to Data Ingestion via Firecrawl | | VOYAGE_API_KEY | Your VoyageAI API Key | Get Access to Embedding Models via VoyageAI | | EMBEDDING_SERVICE_URL | URL to your Embedding Service Instance | Get Access to Embedding Models via [Weaviate Embedding Service](https://weaviate.io/developers/wcs/embeddings) | | EMBEDDING_SERVICE_KEY | Your Embedding Service Key | Get Access to Embedding Models via [Weaviate Embedding Service](https://weaviate.io/developers/wcs/embeddings) | | UPSTAGE_API_KEY | Your Upstage API Key | Get Access to [Upstage](https://upstage.ai/) Models | | UPSTAGE_BASE_URL | URL to Upstage instance | Models | | DEFAULT_DEPLOYMENT | Local, Weaviate, Custom, Docker | Set the default deployment mode | | SYSYEM_MESSAGE_PROMPT | Prompt text value | Default value starts with: "You are Verba, a chatbot for..." | | OLLAMA_MODEL | Your Ollama Model | Set the default Ollama model to use | | OLLAMA_EMBED_MODEL | Your Ollama Embedding Model | Set the default Ollama embedding model to use | ![API Keys in Verba](https://github.com/weaviate/Verba/blob/2.0.0/img/api_screen.png) ## Weaviate Verba provides flexibility in connecting to Weaviate instances based on your needs. You have three options: 1. **Local Deployment**: Use Weaviate Embedded which runs locally on your device (except Windows, choose the Docker/Cloud Deployment) 2. **Docker Deployment**: Choose this option when you're running Verba's Dockerfile. 3. **Cloud Deployment**: Use an existing Weaviate instance hosted on WCD to run Verba **💻 Weaviate Embedded** Embedded Weaviate is a deployment model that runs a Weaviate instance from your application code rather than from a stand-alone Weaviate server installation. When you run Verba in `Local Deployment`, it will setup and manage Embedded Weaviate in the background. Please note that Weaviate Embedded is not supported on Windows and is in Experimental Mode which can bring unexpected errors. We recommend using the Docker Deployment or Cloud Deployment instead. You can read more about Weaviate Embedded [here](https://weaviate.io/developers/weaviate/installation/embedded). **🌩️ Weaviate Cloud Deployment (WCD)** If you prefer a cloud-based solution, Weaviate Cloud (WCD) offers a scalable, managed environment. Learn how to set up a cloud cluster and get the API keys by following the [Weaviate Cluster Setup Guide](https://weaviate.io/developers/wcs/guides/create-instance). **🐳 Docker Deployment** Another local alternative is deploying Weaviate using Docker. For more details, follow the [How to install Verba with Docker](#how-to-install-verba-with-docker) section. ![Deployment in Verba](https://github.com/weaviate/Verba/blob/2.0.0/img/verba_deployment.png) **⚙️ Custom Weaviate Deployment** If you're hosting Weaviate yourself, you can use the `Custom` deployment option in Verba. This will allow you to specify the URL, PORT, and API key of your Weaviate instance. ## Ollama Verba supports Ollama models. Download and Install Ollama on your device (https://ollama.com/download). Make sure to install your preferred LLM using `ollama run `. Tested with `llama3`, `llama3:70b` and `mistral`. The bigger models generally perform better, but need more computational power. > Make sure Ollama Server runs in the background and that you don't ingest documents with different ollama models since their vector dimension can vary that will lead to errors You can verify that by running the following command ``` ollama run llama3 ``` ## Unstructured Verba supports importing documents through Unstructured IO (e.g plain text, .pdf, .csv, and more). To use them you need the `UNSTRUCTURED_API_KEY` and `UNSTRUCTURED_API_URL` environment variable. You can get it from [Unstructured](https://unstructured.io/) > UNSTRUCTURED_API_URL is set to `https://api.unstructuredapp.io/general/v0/general` by default ## AssemblyAI Verba supports importing documents through AssemblyAI (audio files or audio from video files). To use them you need the `ASSEMBLYAI_API_KEY` environment variable. You can get it from [AssemblyAI](https://assemblyai.com) ## OpenAI Verba supports OpenAI Models such as Ada, GPT3, and GPT4. To use them, you need to specify the `OPENAI_API_KEY` environment variable. You can get it from [OpenAI](https://openai.com/) You can also add a `OPENAI_BASE_URL` to use proxies such as LiteLLM (https://github.com/BerriAI/litellm) ``` OPENAI_BASE_URL=YOUR-OPENAI_BASE_URL ``` ### OpenAI Embeddings To specify a different endpoint for your embeddings, set the `OPENAI_EMBED_API_KEY` and `OPENAI_EMBED_BASE_URL` environment variables. If you are using a custom OpenAI Server for embeddings, ensure you set `OPENAI_CUSTOM_EMBED=true`. This will allow Verba to recognize custom embedding model names instead of the default OpenAI embedding model names. ## HuggingFace If you want to use the HuggingFace Features, make sure to install the correct Verba package. It will install required packages to use the local embedding models. Please note that on startup, Verba will automatically download and install embedding models when used. ```bash pip install goldenverba[huggingface] or pip install `.[huggingface]` ``` > If you're using Docker, modify the `Dockerfile` accordingly. It's not possible to install a custom Verba installation if you pull the Docker Image from the Docker Hub, as of now, you'd need to install the Docker deployment from the source code and modify the `Dockerfile` beforehand. ## Groq To use Groq LPUs as generation engine, you need to get an API key from [Groq](https://console.groq.com/keys). > Although you can provide it in the graphical interface when Verba is up, it is recommended to specify it as `GROQ_API_KEY` environment variable before you launch the application. > It will allow you to choose the generation model in an up-to-date available models list. ## Novita To use Novita AI as generation engine, you need to get an API key from [Novita AI](https://novita.ai/settings/key-management?utm_source=github_verba&utm_medium=github_readme&utm_campaign=github_link). # How to deploy with pip `Python >=3.10.0` 1. (Very Important) **Initialize a new Python Environment** ``` python3 -m virtualenv venv source venv/bin/activate ``` 2. **Install Verba** ``` pip install goldenverba ``` 3. **Launch Verba** ``` verba start ``` > You can specify the --port and --host via flags 4. **Access Verba** ``` Visit localhost:8000 ``` 5. (Optional)**Create .env file and add environment variables** # How to build from Source 1. **Clone the Verba repos** ``` git clone https://github.com/weaviate/Verba.git ``` 2. **Initialize a new Python Environment** ``` python3 -m virtualenv venv source venv/bin/activate ``` 3. **Install Verba** ``` pip install -e . ``` 4. **Launch Verba** ``` verba start ``` > You can specify the --port and --host via flags 5. **Access Verba** ``` Visit localhost:8000 ``` 6. (Optional) **Create .env file and add environment variables** # How to install Verba with Docker Docker is a set of platform-as-a-service products that use OS-level virtualization to deliver software in packages called containers. To get started with deploying Verba using Docker, follow the steps below. If you need more detailed instructions on Docker usage, check out the [Docker Curriculum](https://docker-curriculum.com/). You can use `docker pull semitechnologies/verba` to pull the latest Verba Docker Image. Please note, that by pulling directly from Docker Hub you're only able to install the vanilla Verba version that does not include packages e.g `HuggingFace`. If you want to use Docker and `HuggingFace` please follow the steps below. To build the image yourself, you can clone the Verba repository and run `docker build -t verba .` inside the Verba directory. 0. **Clone the Verba repos** Ensure you have Git installed on your system. Then, open a terminal or command prompt and run the following command to clone the Verba repository: ``` git clone https://github.com/weaviate/Verba.git ``` 1. **Set necessary environment variables** Make sure to set your required environment variables in the `.env` file. You can read more about how to set them up in the [API Keys Section](#api-keys) 2. **Adjust the docker-compose file** You can use the `docker-compose.yml` to add required environment variables under the `verba` service and can also adjust the Weaviate Docker settings to enable Authentification or change other settings of your database instance. You can read more about the Weaviate configuration in our [docker-compose documentation](https://weaviate.io/developers/weaviate/installation/docker-compose). You can also uncomment the `ollama` service to use Ollama within the same docker compose. > Please make sure to only add environment variables that you really need. 2. **Deploy using Docker** With Docker installed and the Verba repository cloned, navigate to the directory containing the Docker Compose file in your terminal or command prompt. Run the following command to start the Verba application in detached mode, which allows it to run in the background: ```bash docker compose up -d ``` ```bash docker compose --env-file goldenverba/.env up -d --build ``` This command will download the necessary Docker images, create containers, and start Verba. Remember, Docker must be installed on your system to use this method. For installation instructions and more details about Docker, visit the official Docker documentation. 4. **Access Verba** - You can access your local Weaviate instance at `localhost:8080` - You can access the Verba frontend at `localhost:8000` If you want your Docker Instance to install a specific version of Verba you can edit the `Dockerfile` and change the installation line. ``` RUN pip install -e '.' ``` ## Verba Walkthrough ### Select your Deployment The first screen you'll see is the deployment screen. Here you can select between `Local`, `Docker`, `Weaviate Cloud`, or `Custom` deployment. The `Local` deployment is using Weaviate Embedded under the hood, which initializes a Weaviate instance behind the scenes. The `Docker` deployment is using a separate Weaviate instance that is running inside the same Docker network. The `Weaviate Cloud` deployment is using a Weaviate instance that is hosted on Weaviate Cloud Services (WCS). The `Custom` deployment allows you to specify your own Weaviate instance URL, PORT, and API key. You can skip this part by setting the `DEFAULT_DEPLOYMENT` environment variable to `Local`, `Docker`, `Weaviate`, or `Custom`. ### Import Your Data First thing you need to do is to add your data. You can do this by clicking on `Import Data` and selecting either `Add Files`, `Add Directory`, or `Add URL` tab. Here you can add all your files that you want to ingest. You can then configure every file individually by selecting the file and clicking on `Overview` or `Configure` tab. ![Demo of Verba](https://github.com/weaviate/Verba/blob/2.0.0/img/verba_data.png) ### Query Your Data With Data imported, you can use the `Chat` page to ask any related questions. You will receive relevant chunks that are semantically relevant to your question and an answer generated by your choosen model. You can configure the RAG pipeline under the `Config` tab. ![Demo of Verba](https://github.com/weaviate/Verba/blob/2.0.0/img/verba_rag.png) ## Open Source Contribution Your contributions are always welcome! Feel free to contribute ideas, feedback, or create issues and bug reports if you find any! Before contributing, please read the [Contribution Guide](./CONTRIBUTING.md). Visit our [Weaviate Community Forum](https://forum.weaviate.io/) if you need any help! ### Project Architecture You can learn more about Verba's architecture and implementation in its [technical documentation](./TECHNICAL.md) and [frontend documentation](./FRONTEND.md). It's recommended to have a look at them before making any contributions. ## Known Issues - **Weaviate Embeeded** currently not working on Windows yet - Will be fixed in future versions, until then please use the Docker or WCS Deployment ## FAQ - **Can I use pre-existing data from my Weaviate instance?** - No, unfortunatley not. Verba requires the data to be in a specific format to work. And as of now, this is only possible by importing data through the Verba UI. - **Is Verba Multi-Lingual?** - This depends on your choosen Embedding and Generation Model whether they support multi-lingual data. - **Can I use my Ollama Server with the Verba Docker?** - Yes, you can! Make sure the URL is set to: `OLLAMA_URL=http://host.docker.internal:11434` - If you're running on Linux, you might need to get the IP Gateway of the Ollama server: `OLLAMA_URL="http://YOUR-IP-OF-OLLAMA:11434"` - **How to clear Weaviate Embedded Storage?** - You'll find the stored data here: `~/.local/share/weaviate` - **How can I specify the port?** - You can use the port and host flag `verba start --port 9000 --host 0.0.0.0` - **Can multiple users use Verba at the same time? How about role based access?** - Verba is designed and optimized for single user usage only. There are no plans on supporting multiple users or role based access in the near future. - **Does Verba offer a API endpoint to use externally?** - No, right now Verba does not offer any useful API endpoints to interact with the application. The current FastAPI setup is optimized for the internal communication between the frontend and backend. It is not recommended to use it as a API endpoint. There are plans to add user-friendly - **How to connect to your custom OpenAI Server?** - Set your custom OpenAI API Key and URL in the `.env` file, this will allow Verba to start up and retrieve the models from your custom OpenAI Server. `OPENAI_BASE_URL` is set to `https://api.openai.com/v1` by default. - You can also set a different endpoint for your embeddings by configuring the `OPENAI_EMBED_API_KEY` and `OPENAI_EMBED_BASE_URL` environment variables and setting `OPENAI_CUSTOM_EMBED=true`. For more details, see [OpenAI Embeddings](#openai-embeddings). - **How to upload custom JSON files to Verba?** - Right now Verba does not support custom JSON structure. Instead the whole JSON will simply be dumped into the content field of the Verba document. You can read more about the Verba JSON Structure in the Technical Documentation [here](./TECHNICAL.md). ================================================ FILE: TECHNICAL.md ================================================ # Verba - Technical Documentation This technical documentation is intended for developers who want to understand the inner workings of Verba. Please note that this document might be uncomplete and missing some parts. If you encounter any issues or have questions, please feel free to open an issue. ## FastAPI Server Verba is served through a FastAPI server. The server is serving the static frontend files through the specified port. If you're modifying the frontend, you will need to rebuild the static files again. The frontend is sending API calls to itself which the FastAPI server handles. The server can handle multiple client connections which are handled by the `ClientManager` class. ### ClientManager `TODO` For handling large upload of files, the `BatchManager` class handles batches of data of a single file to merge it into a single file once all batches have been received. ### BatchManager `TODO` ### Websocket `TODO` ## Automated Testing `TODO` ## FAQ ### How to control the position of context sent to the Generator to generate a response? Every `generator` class has a `prepare_messages` method. This method is used to format the messages that are sent to the LLM. The position of the context in the messages is important because it determines where the context is placed in the conversation. ### How to upload a JSON file to Verba? ## Verba JSON Structure A Verba Document can be created from a JSON object. The JSON object is converted to a Verba Document object and then uploaded to the vector database. Here's the general structure of a Verba Document (you can also find the implementation in the `Document.py` file): ```python { "title": "string", # The title of the document "content": "string", # The content of the document "extension": "string", # The extension of the document (Optional) "fileSize": "number", # The size of the document in bytes (Optional) "labels": "array", # The labels of the document (can be empty, used for filtering) "source": "string", # The source of the document (can be an URL, optional) "meta": "object", # The meta data of the document used internally "metadata": "string" # Metadata information of the document, will be used in the embedding process } ``` ## Custom JSON Structure There is currently no support for custom JSON structure. Instead the whole JSON will simply be dumped into the content field of the Verba document. There are plans to add support for custom JSON structure in the future. ================================================ FILE: docker-compose.yml ================================================ --- services: verba: build: context: ./ dockerfile: Dockerfile ports: - 8000:8000 environment: - WEAVIATE_URL_VERBA=http://weaviate:8080 - OPENAI_API_KEY=$OPENAI_API_KEY - COHERE_API_KEY=$COHERE_API_KEY - OLLAMA_URL=http://host.docker.internal:11434 - OLLAMA_MODEL=$OLLAMA_MODEL - OLLAMA_EMBED_MODEL=$OLLAMA_EMBED_MODEL - UNSTRUCTURED_API_KEY=$UNSTRUCTURED_API_KEY - UNSTRUCTURED_API_URL=$UNSTRUCTURED_API_URL - GITHUB_TOKEN=$GITHUB_TOKEN volumes: - ./data:/data/ depends_on: weaviate: condition: service_healthy healthcheck: test: wget --no-verbose --tries=3 --spider http://localhost:8000 || exit 1 interval: 5s timeout: 10s retries: 5 start_period: 10s networks: - ollama-docker weaviate: command: - --host - 0.0.0.0 - --port - '8080' - --scheme - http image: semitechnologies/weaviate:1.25.10 ports: - 8080:8080 - 3000:8080 volumes: - weaviate_data:/var/lib/weaviate restart: on-failure:0 healthcheck: test: wget --no-verbose --tries=3 --spider http://localhost:8080/v1/.well-known/ready || exit 1 interval: 5s timeout: 10s retries: 5 start_period: 10s environment: OPENAI_APIKEY: $OPENAI_API_KEY COHERE_APIKEY: $COHERE_API_KEY QUERY_DEFAULTS_LIMIT: 25 AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' PERSISTENCE_DATA_PATH: '/var/lib/weaviate' ENABLE_MODULES: 'e' CLUSTER_HOSTNAME: 'node1' networks: - ollama-docker # Uncomment to use Ollama within the same docker compose # ollama: # image: ollama/ollama:latest # ports: # - 7869:11434 # volumes: # - .:/code # - ./ollama/ollama:/root/.ollama # container_name: ollama # pull_policy: always # tty: true # restart: always # environment: # - OLLAMA_KEEP_ALIVE=24h # - OLLAMA_HOST=0.0.0.0 # networks: # - ollama-docker volumes: weaviate_data: {} networks: ollama-docker: external: false ... ================================================ FILE: frontend/.eslintrc.json ================================================ { "extends": "next/core-web-vitals" } ================================================ FILE: frontend/.gitignore ================================================ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. # dependencies /node_modules /.pnp .pnp.js # testing /coverage # next.js /.next/ /out/ # production /build # misc .DS_Store *.pem # debug npm-debug.log* yarn-debug.log* yarn-error.log* # local env files .env*.local # vercel .vercel # typescript *.tsbuildinfo next-env.d.ts ================================================ FILE: frontend/app/api.ts ================================================ import { ConnectPayload, HealthPayload, RAGConfig, QueryPayload, Credentials, DocumentsPreviewPayload, DocumentPayload, ChunkScore, ContentPayload, ChunksPayload, RAGConfigResponse, AllSuggestionsPayload, MetadataPayload, DatacountResponse, SuggestionsPayload, ChunkPayload, DocumentFilter, VectorsPayload, UserConfigResponse, ThemeConfigResponse, Theme, UserConfig, LabelsResponse, Themes, } from "./types"; const checkUrl = async (url: string): Promise => { try { const response = await fetch(url); return response.ok; } catch (error) { console.error(`Failed to fetch from ${url}:`, error); return false; } }; export const detectHost = async (): Promise => { const localUrl = "http://localhost:8000/api/health"; const rootUrl = "/api/health"; const isLocalHealthy = await checkUrl(localUrl); if (isLocalHealthy) { return "http://localhost:8000"; } const isRootHealthy = await checkUrl(rootUrl); if (isRootHealthy) { const root = window.location.origin; return root; } throw new Error("Both health checks failed, please check the Verba Server"); }; export const fetchData = async (endpoint: string): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}${endpoint}`, { method: "GET" }); const data = await response.json(); if (!data) { console.warn(`Could not retrieve data from ${endpoint}`); } return data; } catch (error) { console.error(`Failed to fetch data from ${endpoint}:`, error); return null; } }; // Endpoint /api/health export const fetchHealth = (): Promise => fetchData("/api/health"); // Endpoint /api/connect export const connectToVerba = async ( deployment: string, url: string, apiKey: string, port: string ): Promise => { const host = await detectHost(); const response = await fetch(`${host}/api/connect`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ credentials: { deployment: deployment, url: url, key: apiKey, }, port: port, }), }); const data = await response.json(); return data; }; // Endpoint /api/get_rag_config export const fetchRAGConfig = async ( credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_rag_config`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(credentials), }); const data: RAGConfigResponse = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/set_rag_config export const updateRAGConfig = async ( RAG: RAGConfig | null, credentials: Credentials ): Promise => { if (!RAG) { return false; } try { const host = await detectHost(); const response = await fetch(`${host}/api/set_rag_config`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ rag_config: RAG, credentials: credentials }), }); return response.status === 200; } catch (error) { console.error("Error setting config:", error); return false; } }; // Endpoint /api/get_user_config export const fetchUserConfig = async ( credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_user_config`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(credentials), }); const data: UserConfigResponse = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/set_user_config export const updateUserConfig = async ( user_config: UserConfig, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/set_user_config`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ user_config: user_config, credentials: credentials, }), }); return response.status === 200; } catch (error) { console.error("Error setting config:", error); return false; } }; // Endpoint /api/get_theme_config export const fetchThemeConfig = async ( credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_theme_config`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(credentials), }); const data: ThemeConfigResponse = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/set_theme_config export const updateThemeConfig = async ( themes: Themes, theme: Theme, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/set_theme_config`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ themes: themes, theme: theme, credentials: credentials, }), }); return response.status === 200; } catch (error) { console.error("Error setting config:", error); return false; } }; // Endpoint /api/query export const sendUserQuery = async ( query: string, RAG: RAGConfig | null, labels: string[], documentFilter: DocumentFilter[], credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/query`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ query: query, RAG: RAG, labels: labels, documentFilter: documentFilter, credentials: credentials, }), }); const data: QueryPayload = await response.json(); return data; } catch (error) { console.error("Error sending query", error); return null; } }; // Endpoint /api/get_document export const fetchSelectedDocument = async ( uuid: string | null, credentials: Credentials ): Promise => { if (!uuid) { return null; } try { const host = await detectHost(); const response = await fetch(`${host}/api/get_document`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, credentials: credentials, }), }); const data: DocumentPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving selected document", error); return null; } }; // Endpoint /api/get_datacount export const fetchDatacount = async ( embedding_model: string, documentFilter: DocumentFilter[], credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_datacount`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ embedding_model: embedding_model, documentFilter: documentFilter, credentials: credentials, }), }); const data: DatacountResponse = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/get_labels export const fetchLabels = async ( credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_labels`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(credentials), }); const data: LabelsResponse = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/get_content export const fetchContent = async ( uuid: string | null, page: number, chunkScores: ChunkScore[], credentials: Credentials ): Promise => { if (!uuid) { return null; } try { const host = await detectHost(); const response = await fetch(`${host}/api/get_content`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, page: page, chunkScores: chunkScores, credentials: credentials, }), }); const data: ContentPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/get_vectors export const fetch_vectors = async ( uuid: string | null, showAll: boolean, credentials: Credentials ): Promise => { if (!uuid) { return null; } try { const host = await detectHost(); const response = await fetch(`${host}/api/get_vectors`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, showAll: showAll, credentials: credentials, }), }); const data: VectorsPayload | null = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/get_chunks export const fetch_chunks = async ( uuid: string | null, page: number, pageSize: number, credentials: Credentials ): Promise => { if (!uuid) { return null; } try { const host = await detectHost(); const response = await fetch(`${host}/api/get_chunks`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, page: page, pageSize: pageSize, credentials: credentials, }), }); const data: ChunksPayload | null = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/get_chunk export const fetch_chunk = async ( uuid: string | null, embedder: string, credentials: Credentials ): Promise => { if (!uuid) { return null; } try { const host = await detectHost(); const response = await fetch(`${host}/api/get_chunk`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, embedder: embedder, credentials: credentials, }), }); const data: ChunkPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving content", error); return null; } }; // Endpoint /api/get_all_documents export const retrieveAllDocuments = async ( query: string, labels: string[], page: number, pageSize: number, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_all_documents`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ query: query, labels: labels, page: page, pageSize: pageSize, credentials: credentials, }), }); const data: DocumentsPreviewPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving all documents", error); return null; } }; // Endpoint /api/delete_document export const deleteDocument = async ( uuid: string, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/delete_document`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, credentials: credentials, }), }); return response.status === 200; } catch (error) { console.error("Error deleting document", error); return false; } }; // Endpoint /api/reset export const deleteAllDocuments = async ( resetMode: string, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/reset`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ resetMode: resetMode, credentials: credentials, }), }); return response.status === 200; } catch (error) { console.error("Error deleting all documents", error); return false; } }; // Endpoint /api/get_meta export const fetchMeta = async ( credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_meta`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(credentials), }); const data: MetadataPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving selected document", error); return null; } }; // Endpoint /api/get_suggestions export const fetchSuggestions = async ( query: string, limit: number, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_suggestions`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ query: query, limit: limit, credentials: credentials, }), }); const data: SuggestionsPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving suggestions", error); return null; } }; // Endpoint /api/delete_suggestion export const deleteSuggestion = async ( uuid: string, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/delete_suggestion`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ uuid: uuid, credentials: credentials, }), }); return response.status === 200; } catch (error) { console.error("Error deleting suggestion", error); return false; } }; // Endpoint /api/get_all_suggestions export const fetchAllSuggestions = async ( page: number, pageSize: number, credentials: Credentials ): Promise => { try { const host = await detectHost(); const response = await fetch(`${host}/api/get_all_suggestions`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ page: page, pageSize: pageSize, credentials: credentials, }), }); const data: AllSuggestionsPayload = await response.json(); return data; } catch (error) { console.error("Error retrieving all suggestions", error); return null; } }; ================================================ FILE: frontend/app/components/Chat/ChatConfig.tsx ================================================ "use client"; import React, { useCallback } from "react"; import { MdCancel } from "react-icons/md"; import { IoSettingsSharp } from "react-icons/io5"; import { RAGConfig, RAGComponentConfig, Credentials } from "@/app/types"; import { updateRAGConfig } from "@/app/api"; import ComponentView from "../Ingestion/ComponentView"; import VerbaButton from "../Navigation/VerbaButton"; interface ChatConfigProps { RAGConfig: RAGConfig | null; setRAGConfig: React.Dispatch>; onSave: () => void; // New parameter for handling save onReset: () => void; // New parameter for handling reset addStatusMessage: ( message: string, type: "INFO" | "WARNING" | "SUCCESS" | "ERROR" ) => void; credentials: Credentials; production: "Local" | "Demo" | "Production"; } const ChatConfig: React.FC = ({ RAGConfig, setRAGConfig, addStatusMessage, onSave, credentials, onReset, production, }) => { const updateConfig = ( component_n: string, configTitle: string, value: string | boolean | string[] ) => { setRAGConfig((prevRAGConfig) => { if (prevRAGConfig) { const newRAGConfig = { ...prevRAGConfig }; if (typeof value === "string" || typeof value === "boolean") { newRAGConfig[component_n].components[ newRAGConfig[component_n].selected ].config[configTitle].value = value; } else { newRAGConfig[component_n].components[ newRAGConfig[component_n].selected ].config[configTitle].values = value; } return newRAGConfig; } return prevRAGConfig; }); }; const selectComponent = (component_n: string, selected_component: string) => { setRAGConfig((prevRAGConfig) => { if (prevRAGConfig) { const newRAGConfig = { ...prevRAGConfig }; newRAGConfig[component_n].selected = selected_component; return newRAGConfig; } return prevRAGConfig; }); }; const saveComponentConfig = useCallback( async ( component_n: string, selected_component: string, component_config: RAGComponentConfig ) => { if (!RAGConfig) return; addStatusMessage("Saving " + selected_component + " Config", "SUCCESS"); const newRAGConfig = JSON.parse(JSON.stringify(RAGConfig)); newRAGConfig[component_n].selected = selected_component; newRAGConfig[component_n].components[selected_component] = component_config; const response = await updateRAGConfig(newRAGConfig, credentials); if (response) { setRAGConfig(newRAGConfig); } }, [RAGConfig, credentials] ); if (RAGConfig) { return (
{/* Add Save and Reset buttons */}
); } else { return
; } }; export default ChatConfig; ================================================ FILE: frontend/app/components/Chat/ChatInterface.tsx ================================================ "use client"; import React, { useState, useEffect, useRef } from "react"; import { MdCancel, MdOutlineRefresh } from "react-icons/md"; import { TbPlugConnected } from "react-icons/tb"; import { IoChatbubbleSharp } from "react-icons/io5"; import { FaHammer } from "react-icons/fa"; import { IoIosSend } from "react-icons/io"; import { BiError } from "react-icons/bi"; import { IoMdAddCircle } from "react-icons/io"; import VerbaButton from "../Navigation/VerbaButton"; import { updateRAGConfig, sendUserQuery, fetchDatacount, fetchRAGConfig, fetchSuggestions, fetchLabels, } from "@/app/api"; import { getWebSocketApiHost } from "@/app/util"; import { Credentials, QueryPayload, Suggestion, DataCountPayload, ChunkScore, Message, LabelsResponse, RAGConfig, Theme, DocumentFilter, } from "@/app/types"; import InfoComponent from "../Navigation/InfoComponent"; import ChatConfig from "./ChatConfig"; import ChatMessage from "./ChatMessage"; interface ChatInterfaceProps { credentials: Credentials; setSelectedDocument: (s: string | null) => void; setSelectedChunkScore: (c: ChunkScore[]) => void; currentPage: string; RAGConfig: RAGConfig | null; setRAGConfig: React.Dispatch>; selectedTheme: Theme; production: "Local" | "Demo" | "Production"; addStatusMessage: ( message: string, type: "INFO" | "WARNING" | "SUCCESS" | "ERROR" ) => void; documentFilter: DocumentFilter[]; setDocumentFilter: React.Dispatch>; } const ChatInterface: React.FC = ({ production, credentials, setSelectedDocument, setSelectedChunkScore, currentPage, RAGConfig, selectedTheme, setRAGConfig, addStatusMessage, documentFilter, setDocumentFilter, }) => { const [selectedSetting, setSelectedSetting] = useState("Chat"); const isFetching = useRef(false); const [fetchingStatus, setFetchingStatus] = useState< "DONE" | "CHUNKS" | "RESPONSE" >("DONE"); const [previewText, setPreviewText] = useState(""); const [socket, setSocket] = useState(null); const [socketOnline, setSocketOnline] = useState(false); const [reconnect, setReconnect] = useState(false); const [currentSuggestions, setCurrentSuggestions] = useState( [] ); const [labels, setLabels] = useState([]); const [filterLabels, setFilterLabels] = useState([]); const [selectedDocumentScore, setSelectedDocumentScore] = useState< string | null >(null); const [currentDatacount, setCurrentDatacount] = useState(0); const [userInput, setUserInput] = useState(""); const [messages, setMessages] = useState([]); const [isComposing, setIsComposing] = useState(false); const currentEmbedding = RAGConfig ? (RAGConfig["Embedder"].components[RAGConfig["Embedder"].selected].config[ "Model" ].value as string) : "No Config found"; useState("No Embedding Model"); useEffect(() => { setReconnect(true); }, []); useEffect(() => { if (RAGConfig) { retrieveDatacount(); } else { setCurrentDatacount(0); } }, [currentEmbedding, currentPage, documentFilter]); useEffect(() => { setMessages((prev) => { if (prev.length === 0) { return [ { type: "system", content: selectedTheme.intro_message.text, }, ]; } return prev; }); }, [selectedTheme.intro_message.text]); // Setup WebSocket and messages to /ws/generate_stream useEffect(() => { const socketHost = getWebSocketApiHost(); const localSocket = new WebSocket(socketHost); localSocket.onopen = () => { console.log("WebSocket connection opened to " + socketHost); setSocketOnline(true); }; localSocket.onmessage = (event) => { let data; if (!isFetching.current) { setPreviewText(""); return; } try { data = JSON.parse(event.data); } catch (e) { console.error("Received data is not valid JSON:", event.data); return; // Exit early if data isn't valid JSON } const newMessageContent = data.message; setPreviewText((prev) => prev + newMessageContent); if (data.finish_reason === "stop") { isFetching.current = false; setFetchingStatus("DONE"); addStatusMessage("Finished generation", "SUCCESS"); const full_text = data.full_text; if (data.cached) { const distance = data.distance; setMessages((prev) => [ ...prev, { type: "system", content: full_text, cached: true, distance: distance, }, ]); } else { setMessages((prev) => [ ...prev, { type: "system", content: full_text }, ]); } setPreviewText(""); } }; localSocket.onerror = (error) => { console.error("WebSocket Error:", error); setSocketOnline(false); isFetching.current = false; setFetchingStatus("DONE"); setReconnect((prev) => !prev); }; localSocket.onclose = (event) => { if (event.wasClean) { console.log( `WebSocket connection closed cleanly, code=${event.code}, reason=${event.reason}` ); } else { console.error("WebSocket connection died"); } setSocketOnline(false); isFetching.current = false; setFetchingStatus("DONE"); setReconnect((prev) => !prev); }; setSocket(localSocket); return () => { if (localSocket.readyState !== WebSocket.CLOSED) { localSocket.close(); } }; }, [reconnect]); useEffect(() => { if (RAGConfig) { retrieveDatacount(); } else { setCurrentDatacount(0); } }, [RAGConfig]); const retrieveRAGConfig = async () => { const config = await fetchRAGConfig(credentials); if (config) { setRAGConfig(config.rag_config); } else { addStatusMessage("Failed to fetch RAG Config", "ERROR"); } }; const sendUserMessage = async () => { if (isFetching.current || !userInput.trim()) return; const sendInput = userInput; setUserInput(""); isFetching.current = true; setCurrentSuggestions([]); setFetchingStatus("CHUNKS"); setMessages((prev) => [...prev, { type: "user", content: sendInput }]); try { addStatusMessage("Sending query...", "INFO"); const data = await sendUserQuery( sendInput, RAGConfig, filterLabels, documentFilter, credentials ); if (!data || data.error) { handleErrorResponse(data ? data.error : "No data received"); } else { handleSuccessResponse(data, sendInput); } } catch (error) { handleErrorResponse("Failed to fetch from API"); console.error("Failed to fetch from API:", error); } }; const handleErrorResponse = (errorMessage: string) => { addStatusMessage("Query failed", "ERROR"); setMessages((prev) => [...prev, { type: "error", content: errorMessage }]); isFetching.current = false; setFetchingStatus("DONE"); }; const handleSuccessResponse = (data: QueryPayload, sendInput: string) => { setMessages((prev) => [ ...prev, { type: "retrieval", content: data.documents, context: data.context }, ]); addStatusMessage( "Received " + Object.entries(data.documents).length + " documents", "SUCCESS" ); if (data.documents.length > 0) { const firstDoc = data.documents[0]; setSelectedDocument(firstDoc.uuid); setSelectedDocumentScore( `${firstDoc.uuid}${firstDoc.score}${firstDoc.chunks.length}` ); setSelectedChunkScore(firstDoc.chunks); if (data.context) { streamResponses(sendInput, data.context); setFetchingStatus("RESPONSE"); } } else { handleErrorResponse("We couldn't find any chunks to your query"); } }; const streamResponses = (query?: string, context?: string) => { if (socket?.readyState === WebSocket.OPEN) { const filteredMessages = messages .slice(1) // Skip the first message .filter((msg) => msg.type === "user" || msg.type === "system") .map((msg) => ({ type: msg.type, content: msg.content, })); const data = JSON.stringify({ query: query, context: context, conversation: filteredMessages, rag_config: RAGConfig, }); socket.send(data); } else { console.error("WebSocket is not open. ReadyState:", socket?.readyState); } }; const handleCompositionStart = () => { setIsComposing(true); }; const handleCompositionEnd = () => { setIsComposing(false); }; const handleKeyDown = (e: any) => { if (e.key === "Enter" && !e.shiftKey && !isComposing) { e.preventDefault(); // Prevent new line sendUserMessage(); // Submit form } }; const retrieveDatacount = async () => { try { const data: DataCountPayload | null = await fetchDatacount( currentEmbedding, documentFilter, credentials ); const labels: LabelsResponse | null = await fetchLabels(credentials); if (data) { setCurrentDatacount(data.datacount); } if (labels) { setLabels(labels.labels); } } catch (error) { console.error("Failed to fetch from API:", error); addStatusMessage("Failed to fetch datacount: " + error, "ERROR"); } }; const reconnectToVerba = () => { setReconnect((prevState) => !prevState); }; const onSaveConfig = async () => { addStatusMessage("Saved Config", "SUCCESS"); await updateRAGConfig(RAGConfig, credentials); }; const onResetConfig = async () => { addStatusMessage("Reset Config", "WARNING"); retrieveRAGConfig(); }; const handleSuggestions = async () => { if ( RAGConfig && RAGConfig["Retriever"].components[RAGConfig["Retriever"].selected].config[ "Suggestion" ].value ) { const suggestions = await fetchSuggestions(userInput, 3, credentials); if (suggestions) { setCurrentSuggestions(suggestions.suggestions); } } }; return (
{/* Header */}
{ setSelectedSetting("Chat"); }} selected={selectedSetting === "Chat"} disabled={false} selected_color="bg-secondary-verba" /> {production != "Demo" && ( { setSelectedSetting("Config"); }} selected={selectedSetting === "Config"} disabled={false} selected_color="bg-secondary-verba" /> )}
{/* New fixed tab */} {selectedSetting == "Chat" && (
{(filterLabels.length > 0 || documentFilter.length > 0) && ( { setFilterLabels([]); setDocumentFilter([]); }} title="Clear" className="btn-sm max-w-min" icon_size={12} text_class_name="text-xs" Icon={MdCancel} selected={false} disabled={false} /> )}
{filterLabels.map((label, index) => ( { setFilterLabels(filterLabels.filter((l) => l !== label)); }} /> ))} {documentFilter.map((filter, index) => ( { setDocumentFilter( documentFilter.filter((f) => f.uuid !== filter.uuid) ); }} /> ))}
)}
{currentDatacount === 0 && } {currentDatacount === 0 && (

{`${currentDatacount} documents embedded by ${currentEmbedding}`}

)}
{messages.map((message, index) => (
))} {previewText && ( )} {isFetching.current && (

{fetchingStatus === "CHUNKS" && "Retrieving..."} {fetchingStatus === "RESPONSE" && "Generating..."}

)}
{selectedSetting === "Config" && ( )}
{socketOnline ? (