Repository: huggingface/chat-ui Branch: main Commit: 6859cbeaeee0 Files: 412 Total size: 1.1 MB Directory structure: gitextract_mfj9ft39/ ├── .devcontainer/ │ ├── Dockerfile │ └── devcontainer.json ├── .dockerignore ├── .env ├── .env.ci ├── .eslintignore ├── .eslintrc.cjs ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug-report--chat-ui-.md │ │ ├── config-support.md │ │ ├── feature-request--chat-ui-.md │ │ └── huggingchat.md │ ├── release.yml │ └── workflows/ │ ├── build-docs.yml │ ├── build-image.yml │ ├── build-pr-docs.yml │ ├── deploy-dev.yml │ ├── deploy-prod.yml │ ├── lint-and-test.yml │ ├── slugify.yaml │ ├── trufflehog.yml │ └── upload-pr-documentation.yml ├── .gitignore ├── .husky/ │ ├── lint-stage-config.js │ └── pre-commit ├── .npmrc ├── .prettierignore ├── .prettierrc ├── .vscode/ │ ├── launch.json │ └── settings.json ├── CLAUDE.md ├── Dockerfile ├── LICENSE ├── PRIVACY.md ├── README.md ├── chart/ │ ├── Chart.yaml │ ├── env/ │ │ ├── dev.yaml │ │ └── prod.yaml │ ├── templates/ │ │ ├── _helpers.tpl │ │ ├── config.yaml │ │ ├── deployment.yaml │ │ ├── hpa.yaml │ │ ├── infisical.yaml │ │ ├── ingress-internal.yaml │ │ ├── ingress.yaml │ │ ├── network-policy.yaml │ │ ├── service-account.yaml │ │ ├── service-monitor.yaml │ │ └── service.yaml │ └── values.yaml ├── docker-compose.yml ├── docs/ │ └── source/ │ ├── _toctree.yml │ ├── configuration/ │ │ ├── common-issues.md │ │ ├── llm-router.md │ │ ├── mcp-tools.md │ │ ├── metrics.md │ │ ├── open-id.md │ │ ├── overview.md │ │ └── theming.md │ ├── developing/ │ │ └── architecture.md │ ├── index.md │ └── installation/ │ ├── docker.md │ ├── helm.md │ └── local.md ├── entrypoint.sh ├── models/ │ └── add-your-models-here.txt ├── package.json ├── postcss.config.js ├── scripts/ │ ├── config.ts │ ├── populate.ts │ ├── samples.txt │ ├── setups/ │ │ ├── vitest-setup-client.ts │ │ └── vitest-setup-server.ts │ └── updateLocalEnv.ts ├── server.log ├── src/ │ ├── ambient.d.ts │ ├── app.d.ts │ ├── app.html │ ├── hooks.server.ts │ ├── hooks.ts │ ├── lib/ │ │ ├── APIClient.ts │ │ ├── actions/ │ │ │ ├── clickOutside.ts │ │ │ └── snapScrollToBottom.ts │ │ ├── buildPrompt.ts │ │ ├── components/ │ │ │ ├── AnnouncementBanner.svelte │ │ │ ├── BackgroundGenerationPoller.svelte │ │ │ ├── CodeBlock.svelte │ │ │ ├── CopyToClipBoardBtn.svelte │ │ │ ├── DeleteConversationModal.svelte │ │ │ ├── EditConversationModal.svelte │ │ │ ├── ExpandNavigation.svelte │ │ │ ├── HoverTooltip.svelte │ │ │ ├── HtmlPreviewModal.svelte │ │ │ ├── InfiniteScroll.svelte │ │ │ ├── MobileNav.svelte │ │ │ ├── Modal.svelte │ │ │ ├── ModelCardMetadata.svelte │ │ │ ├── NavConversationItem.svelte │ │ │ ├── NavMenu.svelte │ │ │ ├── Pagination.svelte │ │ │ ├── PaginationArrow.svelte │ │ │ ├── Portal.svelte │ │ │ ├── RetryBtn.svelte │ │ │ ├── ScrollToBottomBtn.svelte │ │ │ ├── ScrollToPreviousBtn.svelte │ │ │ ├── ShareConversationModal.svelte │ │ │ ├── StopGeneratingBtn.svelte │ │ │ ├── SubscribeModal.svelte │ │ │ ├── Switch.svelte │ │ │ ├── SystemPromptModal.svelte │ │ │ ├── Toast.svelte │ │ │ ├── Tooltip.svelte │ │ │ ├── WelcomeModal.svelte │ │ │ ├── chat/ │ │ │ │ ├── Alternatives.svelte │ │ │ │ ├── BlockWrapper.svelte │ │ │ │ ├── ChatInput.svelte │ │ │ │ ├── ChatIntroduction.svelte │ │ │ │ ├── ChatMessage.svelte │ │ │ │ ├── ChatWindow.svelte │ │ │ │ ├── FileDropzone.svelte │ │ │ │ ├── ImageLightbox.svelte │ │ │ │ ├── MarkdownBlock.svelte │ │ │ │ ├── MarkdownRenderer.svelte │ │ │ │ ├── MarkdownRenderer.svelte.test.ts │ │ │ │ ├── MessageAvatar.svelte │ │ │ │ ├── ModelSwitch.svelte │ │ │ │ ├── OpenReasoningResults.svelte │ │ │ │ ├── ToolUpdate.svelte │ │ │ │ ├── UploadedFile.svelte │ │ │ │ ├── UrlFetchModal.svelte │ │ │ │ └── VoiceRecorder.svelte │ │ │ ├── icons/ │ │ │ │ ├── IconBurger.svelte │ │ │ │ ├── IconCheap.svelte │ │ │ │ ├── IconChevron.svelte │ │ │ │ ├── IconDazzled.svelte │ │ │ │ ├── IconFast.svelte │ │ │ │ ├── IconLoading.svelte │ │ │ │ ├── IconMCP.svelte │ │ │ │ ├── IconMoon.svelte │ │ │ │ ├── IconNew.svelte │ │ │ │ ├── IconOmni.svelte │ │ │ │ ├── IconPaperclip.svelte │ │ │ │ ├── IconPro.svelte │ │ │ │ ├── IconShare.svelte │ │ │ │ ├── IconSun.svelte │ │ │ │ ├── Logo.svelte │ │ │ │ └── LogoHuggingFaceBorderless.svelte │ │ │ ├── mcp/ │ │ │ │ ├── AddServerForm.svelte │ │ │ │ ├── MCPServerManager.svelte │ │ │ │ └── ServerCard.svelte │ │ │ ├── players/ │ │ │ │ └── AudioPlayer.svelte │ │ │ └── voice/ │ │ │ └── AudioWaveform.svelte │ │ ├── constants/ │ │ │ ├── mcpExamples.ts │ │ │ ├── mime.ts │ │ │ ├── pagination.ts │ │ │ ├── publicSepToken.ts │ │ │ └── routerExamples.ts │ │ ├── createShareLink.ts │ │ ├── jobs/ │ │ │ └── refresh-conversation-stats.ts │ │ ├── migrations/ │ │ │ ├── lock.ts │ │ │ ├── migrations.spec.ts │ │ │ ├── migrations.ts │ │ │ └── routines/ │ │ │ ├── 01-update-search-assistants.ts │ │ │ ├── 02-update-assistants-models.ts │ │ │ ├── 04-update-message-updates.ts │ │ │ ├── 05-update-message-files.ts │ │ │ ├── 06-trim-message-updates.ts │ │ │ ├── 08-update-featured-to-review.ts │ │ │ ├── 09-delete-empty-conversations.spec.ts │ │ │ ├── 09-delete-empty-conversations.ts │ │ │ ├── 10-update-reports-assistantid.ts │ │ │ └── index.ts │ │ ├── server/ │ │ │ ├── __tests__/ │ │ │ │ └── conversation-stop-generating.spec.ts │ │ │ ├── abortRegistry.ts │ │ │ ├── abortedGenerations.ts │ │ │ ├── adminToken.ts │ │ │ ├── api/ │ │ │ │ ├── __tests__/ │ │ │ │ │ ├── conversations-id.spec.ts │ │ │ │ │ ├── conversations-message.spec.ts │ │ │ │ │ ├── conversations.spec.ts │ │ │ │ │ ├── misc.spec.ts │ │ │ │ │ ├── testHelpers.ts │ │ │ │ │ ├── user-reports.spec.ts │ │ │ │ │ └── user.spec.ts │ │ │ │ ├── types.ts │ │ │ │ └── utils/ │ │ │ │ ├── requireAuth.ts │ │ │ │ ├── resolveConversation.ts │ │ │ │ ├── resolveModel.ts │ │ │ │ └── superjsonResponse.ts │ │ │ ├── apiToken.ts │ │ │ ├── auth.ts │ │ │ ├── config.ts │ │ │ ├── conversation.ts │ │ │ ├── database.ts │ │ │ ├── endpoints/ │ │ │ │ ├── document.ts │ │ │ │ ├── endpoints.ts │ │ │ │ ├── images.ts │ │ │ │ ├── openai/ │ │ │ │ │ ├── endpointOai.ts │ │ │ │ │ ├── openAIChatToTextGenerationStream.ts │ │ │ │ │ └── openAICompletionToTextGenerationStream.ts │ │ │ │ └── preprocessMessages.ts │ │ │ ├── exitHandler.ts │ │ │ ├── files/ │ │ │ │ ├── downloadFile.ts │ │ │ │ └── uploadFile.ts │ │ │ ├── findRepoRoot.ts │ │ │ ├── generateFromDefaultEndpoint.ts │ │ │ ├── hooks/ │ │ │ │ ├── error.ts │ │ │ │ ├── fetch.ts │ │ │ │ ├── handle.ts │ │ │ │ └── init.ts │ │ │ ├── isURLLocal.spec.ts │ │ │ ├── isURLLocal.ts │ │ │ ├── logger.ts │ │ │ ├── mcp/ │ │ │ │ ├── clientPool.ts │ │ │ │ ├── hf.ts │ │ │ │ ├── httpClient.ts │ │ │ │ ├── registry.ts │ │ │ │ └── tools.ts │ │ │ ├── metrics.ts │ │ │ ├── models.ts │ │ │ ├── requestContext.ts │ │ │ ├── router/ │ │ │ │ ├── arch.ts │ │ │ │ ├── endpoint.ts │ │ │ │ ├── multimodal.ts │ │ │ │ ├── policy.ts │ │ │ │ ├── toolsRoute.ts │ │ │ │ └── types.ts │ │ │ ├── sendSlack.ts │ │ │ ├── textGeneration/ │ │ │ │ ├── generate.ts │ │ │ │ ├── index.ts │ │ │ │ ├── mcp/ │ │ │ │ │ ├── fileRefs.ts │ │ │ │ │ ├── routerResolution.ts │ │ │ │ │ ├── runMcpFlow.ts │ │ │ │ │ └── toolInvocation.ts │ │ │ │ ├── reasoning.ts │ │ │ │ ├── title.ts │ │ │ │ ├── types.ts │ │ │ │ └── utils/ │ │ │ │ ├── prepareFiles.ts │ │ │ │ ├── routing.ts │ │ │ │ └── toolPrompt.ts │ │ │ ├── urlSafety.ts │ │ │ └── usageLimits.ts │ │ ├── stores/ │ │ │ ├── backgroundGenerations.svelte.ts │ │ │ ├── backgroundGenerations.ts │ │ │ ├── errors.ts │ │ │ ├── isAborted.ts │ │ │ ├── isPro.ts │ │ │ ├── loading.ts │ │ │ ├── mcpServers.ts │ │ │ ├── pendingChatInput.ts │ │ │ ├── pendingMessage.ts │ │ │ ├── settings.ts │ │ │ ├── shareModal.ts │ │ │ └── titleUpdate.ts │ │ ├── switchTheme.ts │ │ ├── types/ │ │ │ ├── AbortedGeneration.ts │ │ │ ├── Assistant.ts │ │ │ ├── AssistantStats.ts │ │ │ ├── ConfigKey.ts │ │ │ ├── ConvSidebar.ts │ │ │ ├── Conversation.ts │ │ │ ├── ConversationStats.ts │ │ │ ├── Message.ts │ │ │ ├── MessageEvent.ts │ │ │ ├── MessageUpdate.ts │ │ │ ├── MigrationResult.ts │ │ │ ├── Model.ts │ │ │ ├── Report.ts │ │ │ ├── Review.ts │ │ │ ├── Semaphore.ts │ │ │ ├── Session.ts │ │ │ ├── Settings.ts │ │ │ ├── SharedConversation.ts │ │ │ ├── Template.ts │ │ │ ├── Timestamps.ts │ │ │ ├── TokenCache.ts │ │ │ ├── Tool.ts │ │ │ ├── UrlDependency.ts │ │ │ └── User.ts │ │ ├── utils/ │ │ │ ├── PublicConfig.svelte.ts │ │ │ ├── auth.ts │ │ │ ├── chunk.ts │ │ │ ├── cookiesAreEnabled.ts │ │ │ ├── debounce.ts │ │ │ ├── deepestChild.ts │ │ │ ├── favicon.ts │ │ │ ├── fetchJSON.ts │ │ │ ├── file2base64.ts │ │ │ ├── formatUserCount.ts │ │ │ ├── generationState.spec.ts │ │ │ ├── generationState.ts │ │ │ ├── getHref.ts │ │ │ ├── getReturnFromGenerator.ts │ │ │ ├── haptics.ts │ │ │ ├── hashConv.ts │ │ │ ├── hf.ts │ │ │ ├── isDesktop.ts │ │ │ ├── isUrl.ts │ │ │ ├── isVirtualKeyboard.ts │ │ │ ├── loadAttachmentsFromUrls.ts │ │ │ ├── marked.spec.ts │ │ │ ├── marked.ts │ │ │ ├── mcpValidation.ts │ │ │ ├── mergeAsyncGenerators.ts │ │ │ ├── messageUpdates.spec.ts │ │ │ ├── messageUpdates.ts │ │ │ ├── mime.ts │ │ │ ├── models.ts │ │ │ ├── parseBlocks.ts │ │ │ ├── parseIncompleteMarkdown.ts │ │ │ ├── parseStringToList.ts │ │ │ ├── randomUuid.ts │ │ │ ├── searchTokens.ts │ │ │ ├── sha256.ts │ │ │ ├── stringifyError.ts │ │ │ ├── sum.ts │ │ │ ├── template.spec.ts │ │ │ ├── template.ts │ │ │ ├── timeout.ts │ │ │ ├── toolProgress.spec.ts │ │ │ ├── toolProgress.ts │ │ │ ├── tree/ │ │ │ │ ├── addChildren.spec.ts │ │ │ │ ├── addChildren.ts │ │ │ │ ├── addSibling.spec.ts │ │ │ │ ├── addSibling.ts │ │ │ │ ├── buildSubtree.spec.ts │ │ │ │ ├── buildSubtree.ts │ │ │ │ ├── convertLegacyConversation.spec.ts │ │ │ │ ├── convertLegacyConversation.ts │ │ │ │ ├── isMessageId.spec.ts │ │ │ │ ├── isMessageId.ts │ │ │ │ ├── tree.d.ts │ │ │ │ └── treeHelpers.spec.ts │ │ │ ├── updates.ts │ │ │ └── urlParams.ts │ │ └── workers/ │ │ └── markdownWorker.ts │ ├── routes/ │ │ ├── +error.svelte │ │ ├── +layout.svelte │ │ ├── +layout.ts │ │ ├── +page.svelte │ │ ├── .well-known/ │ │ │ └── oauth-cimd/ │ │ │ └── +server.ts │ │ ├── __debug/ │ │ │ └── openai/ │ │ │ └── +server.ts │ │ ├── admin/ │ │ │ ├── export/ │ │ │ │ └── +server.ts │ │ │ └── stats/ │ │ │ └── compute/ │ │ │ └── +server.ts │ │ ├── api/ │ │ │ ├── conversation/ │ │ │ │ └── [id]/ │ │ │ │ ├── +server.ts │ │ │ │ └── message/ │ │ │ │ └── [messageId]/ │ │ │ │ └── +server.ts │ │ │ ├── conversations/ │ │ │ │ └── +server.ts │ │ │ ├── fetch-url/ │ │ │ │ └── +server.ts │ │ │ ├── mcp/ │ │ │ │ ├── health/ │ │ │ │ │ └── +server.ts │ │ │ │ └── servers/ │ │ │ │ └── +server.ts │ │ │ ├── models/ │ │ │ │ └── +server.ts │ │ │ ├── transcribe/ │ │ │ │ └── +server.ts │ │ │ ├── user/ │ │ │ │ ├── +server.ts │ │ │ │ └── validate-token/ │ │ │ │ └── +server.ts │ │ │ └── v2/ │ │ │ ├── conversations/ │ │ │ │ ├── +server.ts │ │ │ │ ├── [id]/ │ │ │ │ │ ├── +server.ts │ │ │ │ │ └── message/ │ │ │ │ │ └── [messageId]/ │ │ │ │ │ └── +server.ts │ │ │ │ └── import-share/ │ │ │ │ └── +server.ts │ │ │ ├── debug/ │ │ │ │ ├── config/ │ │ │ │ │ └── +server.ts │ │ │ │ └── refresh/ │ │ │ │ └── +server.ts │ │ │ ├── export/ │ │ │ │ └── +server.ts │ │ │ ├── feature-flags/ │ │ │ │ └── +server.ts │ │ │ ├── models/ │ │ │ │ ├── +server.ts │ │ │ │ ├── [namespace]/ │ │ │ │ │ ├── +server.ts │ │ │ │ │ ├── [model]/ │ │ │ │ │ │ ├── +server.ts │ │ │ │ │ │ └── subscribe/ │ │ │ │ │ │ └── +server.ts │ │ │ │ │ └── subscribe/ │ │ │ │ │ └── +server.ts │ │ │ │ ├── old/ │ │ │ │ │ └── +server.ts │ │ │ │ └── refresh/ │ │ │ │ └── +server.ts │ │ │ ├── public-config/ │ │ │ │ └── +server.ts │ │ │ └── user/ │ │ │ ├── +server.ts │ │ │ ├── billing-orgs/ │ │ │ │ └── +server.ts │ │ │ ├── reports/ │ │ │ │ └── +server.ts │ │ │ └── settings/ │ │ │ └── +server.ts │ │ ├── conversation/ │ │ │ ├── +server.ts │ │ │ └── [id]/ │ │ │ ├── +page.svelte │ │ │ ├── +page.ts │ │ │ ├── +server.ts │ │ │ ├── message/ │ │ │ │ └── [messageId]/ │ │ │ │ └── prompt/ │ │ │ │ └── +server.ts │ │ │ ├── output/ │ │ │ │ └── [sha256]/ │ │ │ │ └── +server.ts │ │ │ ├── share/ │ │ │ │ └── +server.ts │ │ │ └── stop-generating/ │ │ │ └── +server.ts │ │ ├── healthcheck/ │ │ │ └── +server.ts │ │ ├── login/ │ │ │ ├── +server.ts │ │ │ └── callback/ │ │ │ ├── +server.ts │ │ │ ├── updateUser.spec.ts │ │ │ └── updateUser.ts │ │ ├── logout/ │ │ │ └── +server.ts │ │ ├── metrics/ │ │ │ └── +server.ts │ │ ├── models/ │ │ │ ├── +page.svelte │ │ │ └── [...model]/ │ │ │ ├── +page.svelte │ │ │ └── +page.ts │ │ ├── privacy/ │ │ │ └── +page.svelte │ │ ├── r/ │ │ │ └── [id]/ │ │ │ └── +page.ts │ │ └── settings/ │ │ ├── (nav)/ │ │ │ ├── +layout.svelte │ │ │ ├── +layout.ts │ │ │ ├── +page.svelte │ │ │ ├── +server.ts │ │ │ ├── [...model]/ │ │ │ │ ├── +page.svelte │ │ │ │ └── +page.ts │ │ │ └── application/ │ │ │ └── +page.svelte │ │ └── +layout.svelte │ └── styles/ │ ├── highlight-js.css │ └── main.css ├── static/ │ ├── chatui/ │ │ └── manifest.json │ ├── huggingchat/ │ │ ├── manifest.json │ │ └── routes.chat.json │ └── robots.txt ├── stub/ │ └── @reflink/ │ └── reflink/ │ ├── index.js │ └── package.json ├── svelte.config.js ├── tailwind.config.cjs ├── tsconfig.json └── vite.config.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: .devcontainer/Dockerfile ================================================ FROM mcr.microsoft.com/devcontainers/typescript-node:1-22-bookworm # Install MongoDB tools (mongosh, mongorestore, mongodump) directly from MongoDB repository RUN curl -fsSL https://www.mongodb.org/static/pgp/server-8.0.asc | gpg --dearmor -o /usr/share/keyrings/mongodb-server-8.0.gpg && \ echo "deb [ signed-by=/usr/share/keyrings/mongodb-server-8.0.gpg ] http://repo.mongodb.org/apt/debian bookworm/mongodb-org/8.0 main" | tee /etc/apt/sources.list.d/mongodb-org-8.0.list && \ apt-get update && \ apt-get install -y mongodb-mongosh mongodb-database-tools vim && \ apt-get autoremove -y && \ rm -rf /var/lib/apt/lists/* ================================================ FILE: .devcontainer/devcontainer.json ================================================ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node { "name": "Node.js & TypeScript", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "build": { "dockerfile": "Dockerfile" }, "customizations": { "vscode": { "extensions": ["esbenp.prettier-vscode", "dbaeumer.vscode-eslint", "svelte.svelte-vscode"] } }, "features": { // Install docker in container "ghcr.io/devcontainers/features/docker-in-docker:2": { // Use proprietary docker engine. I get a timeout error when using the default moby engine and loading // microsoft's PGP keys "moby": false } } // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. // "postCreateCommand": "yarn install", // Configure tool-specific properties. // "customizations": {}, // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. // "remoteUser": "root" } ================================================ FILE: .dockerignore ================================================ Dockerfile .vscode/ .idea .gitignore LICENSE README.md node_modules/ .svelte-kit/ .env* !.env .env.local db models/** ================================================ FILE: .env ================================================ # Use .env.local to change these variables # DO NOT EDIT THIS FILE WITH SENSITIVE DATA ### Models ### # Models are sourced exclusively from an OpenAI-compatible base URL. # Example: https://router.huggingface.co/v1 OPENAI_BASE_URL=https://router.huggingface.co/v1 # Canonical auth token for any OpenAI-compatible provider OPENAI_API_KEY=#your provider API key (works for HF router, OpenAI, LM Studio, etc.). # When set to true, user token will be used for inference calls USE_USER_TOKEN=false # Automatically redirect to oauth login page if user is not logged in, when set to "true" AUTOMATIC_LOGIN=false ### MongoDB ### MONGODB_URL=#your mongodb URL here, use chat-ui-db image if you don't want to set this MONGODB_DB_NAME=chat-ui MONGODB_DIRECT_CONNECTION=false ## Public app configuration ## PUBLIC_APP_NAME=ChatUI # name used as title throughout the app PUBLIC_APP_ASSETS=chatui # used to find logos & favicons in static/$PUBLIC_APP_ASSETS PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone."# description used throughout the app PUBLIC_ORIGIN= PUBLIC_SHARE_PREFIX= PUBLIC_GOOGLE_ANALYTICS_ID= PUBLIC_PLAUSIBLE_SCRIPT_URL= PUBLIC_APPLE_APP_ID= COUPLE_SESSION_WITH_COOKIE_NAME= # when OPEN_ID is configured, users are required to login after the welcome modal OPENID_CLIENT_ID="" # You can set to "__CIMD__" for automatic oauth app creation when deployed, see https://datatracker.ietf.org/doc/draft-ietf-oauth-client-id-metadata-document/ OPENID_CLIENT_SECRET= OPENID_SCOPES="openid profile inference-api read-mcp read-billing" USE_USER_TOKEN= AUTOMATIC_LOGIN=# if true authentication is required on all routes ### Local Storage ### MONGO_STORAGE_PATH= # where is the db folder stored ## Models overrides MODELS= ## Task model # Optional: set to the model id/name from the `${OPENAI_BASE_URL}/models` list # to use for internal tasks (title summarization, etc). If not set, the current model will be used TASK_MODEL= # Arch router (OpenAI-compatible) endpoint base URL used for route selection # Example: https://api.openai.com/v1 or your hosted Arch endpoint LLM_ROUTER_ARCH_BASE_URL= ## LLM Router Configuration # Path to routes policy (JSON array). Required when the router is enabled; must point to a valid JSON file. LLM_ROUTER_ROUTES_PATH= # Model used at the Arch router endpoint for selection LLM_ROUTER_ARCH_MODEL= # Fallback behavior # Route to map "other" to (must exist in routes file) LLM_ROUTER_OTHER_ROUTE=casual_conversation # Model to call if the Arch selection fails entirely LLM_ROUTER_FALLBACK_MODEL= # Arch selection timeout in milliseconds (default 10000) LLM_ROUTER_ARCH_TIMEOUT_MS=10000 # Maximum length (in characters) for assistant messages sent to router for route selection (default 500) LLM_ROUTER_MAX_ASSISTANT_LENGTH=500 # Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400) LLM_ROUTER_MAX_PREV_USER_LENGTH=400 # Enable router multimodal handling (set to true to allow image inputs via router) LLM_ROUTER_ENABLE_MULTIMODAL= # Required when LLM_ROUTER_ENABLE_MULTIMODAL=true: id or name of the multimodal model to use for image requests LLM_ROUTER_MULTIMODAL_MODEL= # Enable router tool support (set to true to allow tool calling via router) LLM_ROUTER_ENABLE_TOOLS= # Required when tools are active: id or name of the model to use for MCP tool calls. LLM_ROUTER_TOOLS_MODEL= # Router UI overrides (client-visible) # Public display name for the router entry in the model list. Defaults to "Omni". PUBLIC_LLM_ROUTER_DISPLAY_NAME=Omni # Optional: public logo URL for the router entry. If unset, the UI shows a Carbon icon. PUBLIC_LLM_ROUTER_LOGO_URL= # Public alias id used for the virtual router model (Omni). Defaults to "omni". PUBLIC_LLM_ROUTER_ALIAS_ID=omni ### Transcription ### # Voice-to-text transcription using Whisper models # If set, enables the microphone button in the chat input # Example: openai/whisper-large-v3-turbo TRANSCRIPTION_MODEL= # Optional: Base URL for transcription API (defaults to HF inference) # Default: https://router.huggingface.co/hf-inference/models TRANSCRIPTION_BASE_URL= ### Authentication ### # Parameters to enable open id login OPENID_CONFIG= # if it's defined, only these emails will be allowed to use login ALLOWED_USER_EMAILS=[] # If it's defined, users with emails matching these domains will also be allowed to use login ALLOWED_USER_DOMAINS=[] # valid alternative redirect URLs for OAuth, used for HuggingChat apps ALTERNATIVE_REDIRECT_URLS=[] ### Cookies # name of the cookie used to store the session COOKIE_NAME=hf-chat # If the value of this cookie changes, the session is destroyed. Useful if chat-ui is deployed on a subpath # of your domain, and you want chat ui sessions to reset if the user's auth changes COUPLE_SESSION_WITH_COOKIE_NAME= # specify secure behaviour for cookies COOKIE_SAMESITE=# can be "lax", "strict", "none" or left empty COOKIE_SECURE=# set to true to only allow cookies over https TRUSTED_EMAIL_HEADER=# header to use to get the user email, only use if you know what you are doing ### Admin stuff ### ADMIN_CLI_LOGIN=true # set to false to disable the CLI login ADMIN_TOKEN=#We recommend leaving this empty, you can get the token from the terminal. ### Feature Flags ### LLM_SUMMARIZATION=true # generate conversation titles with LLMs ALLOW_IFRAME=true # Allow the app to be embedded in an iframe # Base servers list (JSON array). Example: MCP_SERVERS=[{"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp"}, {"name": "Hugging Face", "url": "https://hf.co/mcp"}] MCP_SERVERS= # When true, forward the logged-in user's Hugging Face access token MCP_FORWARD_HF_USER_TOKEN= # Exa API key (injected at runtime into mcp.exa.ai URLs as ?exaApiKey=) EXA_API_KEY= # Timeout in milliseconds for MCP tool calls (default: 120000 = 2 minutes) MCP_TOOL_TIMEOUT_MS= ENABLE_DATA_EXPORT=true ### Rate limits ### # See `src/lib/server/usageLimits.ts` # { # conversations: number, # how many conversations # messages: number, # how many messages in a conversation # assistants: number, # how many assistants # messageLength: number, # how long can a message be before we cut it off # messagesPerMinute: number, # how many messages per minute # tools: number # how many tools # } USAGE_LIMITS={} ### HuggingFace specific ### ## Feature flag & admin settings # Used for setting early access & admin flags to users HF_ORG_ADMIN= HF_ORG_EARLY_ACCESS= WEBHOOK_URL_REPORT_ASSISTANT=#provide slack webhook url to get notified for reports/feature requests ### Metrics ### METRICS_ENABLED=false METRICS_PORT=5565 LOG_LEVEL=info ### Parquet export ### # Not in use anymore but useful to export conversations to a parquet file as a HuggingFace dataset PARQUET_EXPORT_DATASET= PARQUET_EXPORT_HF_TOKEN= ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or exporting parquet data ### Config ### ENABLE_CONFIG_MANAGER=true ### Docker build variables ### # These values cannot be updated at runtime # They need to be passed when building the docker image # See https://github.com/huggingface/chat-ui/main/.github/workflows/deploy-prod.yml#L44-L47 APP_BASE="" # base path of the app, e.g. /chat, left blank as default ### Body size limit for SvelteKit https://svelte.dev/docs/kit/adapter-node#Environment-variables-BODY_SIZE_LIMIT BODY_SIZE_LIMIT=15728640 PUBLIC_COMMIT_SHA= ### LEGACY parameters ALLOW_INSECURE_COOKIES=false # LEGACY! Use COOKIE_SECURE and COOKIE_SAMESITE instead PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead RATE_LIMIT= # /!\ DEPRECATED definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead OPENID_NAME_CLAIM="name" # Change to "username" for some providers that do not provide name OPENID_PROVIDER_URL=https://huggingface.co # for Google, use https://accounts.google.com OPENID_TOLERANCE= OPENID_RESOURCE= EXPOSE_API=# deprecated, API is now always exposed ================================================ FILE: .env.ci ================================================ MONGODB_URL=mongodb://localhost:27017/ ================================================ FILE: .eslintignore ================================================ .DS_Store node_modules /build /.svelte-kit /package .env .env.* !.env.example # Ignore files for PNPM, NPM and YARN pnpm-lock.yaml package-lock.json yarn.lock ================================================ FILE: .eslintrc.cjs ================================================ module.exports = { root: true, parser: "@typescript-eslint/parser", extends: [ "eslint:recommended", "plugin:@typescript-eslint/recommended", "plugin:svelte/recommended", "prettier", ], plugins: ["@typescript-eslint"], ignorePatterns: ["*.cjs"], overrides: [ { files: ["*.svelte"], parser: "svelte-eslint-parser", parserOptions: { parser: "@typescript-eslint/parser", }, }, ], parserOptions: { sourceType: "module", ecmaVersion: 2020, extraFileExtensions: [".svelte"], }, rules: { "no-empty": "off", "require-yield": "off", "@typescript-eslint/no-explicit-any": "error", "@typescript-eslint/no-non-null-assertion": "error", "@typescript-eslint/no-unused-vars": [ // prevent variables with a _ prefix from being marked as unused "error", { argsIgnorePattern: "^_", }, ], "object-shorthand": ["error", "always"], }, env: { browser: true, es2017: true, node: true, }, }; ================================================ FILE: .github/ISSUE_TEMPLATE/bug-report--chat-ui-.md ================================================ --- name: Bug Report (chat-ui) about: Use this for confirmed issues with chat-ui title: "" labels: bug assignees: "" --- ## Bug description ## Steps to reproduce ## Screenshots ## Context ### Logs ``` // logs here if relevant ``` ### Specs - **OS**: - **Browser**: - **chat-ui commit**: ### Config ## Notes ================================================ FILE: .github/ISSUE_TEMPLATE/config-support.md ================================================ --- name: Config Support about: Help with setting up chat-ui locally title: "" labels: support assignees: "" --- **Please use the discussions on GitHub** for getting help with setting things up instead of opening an issue: https://github.com/huggingface/chat-ui/discussions ================================================ FILE: .github/ISSUE_TEMPLATE/feature-request--chat-ui-.md ================================================ --- name: Feature Request (chat-ui) about: Suggest new features to be added to chat-ui title: "" labels: enhancement assignees: "" --- ## Describe your feature request ## Screenshots (if relevant) ## Implementation idea ================================================ FILE: .github/ISSUE_TEMPLATE/huggingchat.md ================================================ --- name: HuggingChat about: Requests & reporting outages on HuggingChat, the hosted version of chat-ui. title: "" labels: huggingchat assignees: "" --- **Do not use GitHub issues** for requesting models on HuggingChat or reporting issues with HuggingChat being down/overloaded. **Use the discussions page on the hub instead:** https://huggingface.co/spaces/huggingchat/chat-ui/discussions ================================================ FILE: .github/release.yml ================================================ changelog: exclude: labels: - huggingchat - CI/CD - documentation categories: - title: Features labels: - enhancement - title: Bugfixes labels: - bug - title: Other changes labels: - "*" ================================================ FILE: .github/workflows/build-docs.yml ================================================ name: Build documentation on: push: branches: - main - v*-release jobs: build: uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main with: commit_sha: ${{ github.sha }} package: chat-ui additional_args: --not_python_module secrets: token: ${{ secrets.HUGGINGFACE_PUSH }} hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} ================================================ FILE: .github/workflows/build-image.yml ================================================ name: Build and Publish Image permissions: packages: write on: push: branches: - "main" pull_request: branches: - "*" paths: - "Dockerfile" - "entrypoint.sh" workflow_dispatch: release: types: [published, edited] jobs: build-and-publish-image-with-db: runs-on: group: aws-general-8-plus steps: - name: Checkout uses: actions/checkout@v4 - name: Extract package version id: package-version run: | VERSION=$(jq -r .version package.json) echo "VERSION=$VERSION" >> $GITHUB_OUTPUT MAJOR=$(echo $VERSION | cut -d '.' -f1) echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2) echo "MINOR=$MINOR" >> $GITHUB_OUTPUT - name: Docker metadata id: meta uses: docker/metadata-action@v5 with: images: | ghcr.io/huggingface/chat-ui-db tags: | type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}} type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}} type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}} type=raw,value=latest,enable={{is_default_branch}} type=sha,enable={{is_default_branch}} - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to GitHub Container Registry if: github.event_name != 'pull_request' uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.5.0 - name: Build and Publish Docker Image with DB uses: docker/build-push-action@v5 with: context: . file: Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha cache-to: type=gha,mode=max build-args: | INCLUDE_DB=true PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }} build-and-publish-image-nodb: runs-on: group: aws-general-8-plus steps: - name: Checkout uses: actions/checkout@v4 - name: Extract package version id: package-version run: | VERSION=$(jq -r .version package.json) echo "VERSION=$VERSION" >> $GITHUB_OUTPUT MAJOR=$(echo $VERSION | cut -d '.' -f1) echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2) echo "MINOR=$MINOR" >> $GITHUB_OUTPUT - name: Docker metadata id: meta uses: docker/metadata-action@v5 with: images: | ghcr.io/huggingface/chat-ui tags: | type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}} type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}} type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}} type=raw,value=latest,enable={{is_default_branch}} type=sha,enable={{is_default_branch}} - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to GitHub Container Registry if: github.event_name != 'pull_request' uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.5.0 - name: Build and Publish Docker Image without DB uses: docker/build-push-action@v5 with: context: . file: Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha cache-to: type=gha,mode=max build-args: | INCLUDE_DB=false PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }} ================================================ FILE: .github/workflows/build-pr-docs.yml ================================================ name: Build PR Documentation on: pull_request: paths: - "docs/source/**" - ".github/workflows/build-pr-docs.yml" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: build: uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main with: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: chat-ui additional_args: --not_python_module ================================================ FILE: .github/workflows/deploy-dev.yml ================================================ name: Deploy to ephemeral on: pull_request: types: [opened, reopened, synchronize, labeled, unlabeled] jobs: branch-slug: uses: ./.github/workflows/slugify.yaml with: value: ${{ github.head_ref }} deploy-dev: if: contains(github.event.pull_request.labels.*.name, 'preview') runs-on: ubuntu-latest needs: branch-slug environment: name: dev url: https://${{ needs.branch-slug.outputs.slug }}.chat-dev.huggingface.tech/chat/ steps: - name: Checkout uses: actions/checkout@v4 - name: Login to Registry uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.5.0 - name: Set GITHUB_SHA_SHORT from PR if: env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT != null run: echo "GITHUB_SHA_SHORT=${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT }}" >> $GITHUB_ENV - name: Docker metadata id: meta uses: docker/metadata-action@v5 with: images: | huggingface/chat-ui tags: | type=raw,value=dev-${{ env.GITHUB_SHA_SHORT }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build and Publish HuggingChat image uses: docker/build-push-action@v5 with: context: . file: Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64 cache-to: type=gha,mode=max,scope=amd64 cache-from: type=gha,scope=amd64 provenance: false build-args: | INCLUDE_DB=false APP_BASE=/chat PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }} ================================================ FILE: .github/workflows/deploy-prod.yml ================================================ name: Deploy to k8s on: # run this workflow manually from the Actions tab workflow_dispatch: jobs: build-and-publish-huggingchat-image: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Login to Registry uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} - name: Docker metadata id: meta uses: docker/metadata-action@v5 with: images: | huggingface/chat-ui tags: | type=raw,value=latest,enable={{is_default_branch}} type=sha,enable=true,prefix=sha-,format=short,sha-len=8 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.5.0 - name: Build and Publish HuggingChat image uses: docker/build-push-action@v5 with: context: . file: Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64 cache-to: type=gha,mode=max,scope=amd64 cache-from: type=gha,scope=amd64 provenance: false build-args: | INCLUDE_DB=false APP_BASE=/chat PUBLIC_COMMIT_SHA=${{ env.GITHUB_SHA_SHORT }} deploy: name: Deploy on prod runs-on: ubuntu-latest needs: ["build-and-publish-huggingchat-image"] steps: - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.5.0 - name: Gen values run: | VALUES=$(cat <<-END image: tag: "sha-${{ env.GITHUB_SHA_SHORT }}" END ) echo "VALUES=$(echo "$VALUES" | yq -o=json | jq tostring)" >> $GITHUB_ENV - name: Deploy on infra-deployments uses: aurelien-baudet/workflow-dispatch@v2 with: workflow: Update application single value repo: huggingface/infra-deployments wait-for-completion: true wait-for-completion-interval: 10s display-workflow-run-url-interval: 10s ref: refs/heads/main token: ${{ secrets.GIT_TOKEN_INFRA_DEPLOYMENT }} inputs: '{"path": "hub/chat-ui/chat-ui.yaml", "value": ${{ env.VALUES }}, "url": "${{ github.event.head_commit.url }}"}' ================================================ FILE: .github/workflows/lint-and-test.yml ================================================ name: Lint and test on: pull_request: push: branches: - main jobs: lint: runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 with: node-version: "20" cache: "npm" - run: | npm install ci - name: "Checking lint/format errors" run: | npm run lint - name: "Checking type errors" run: | npm run check test: runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 with: node-version: "20" cache: "npm" - run: | npm ci npx playwright install - name: "Tests" run: | npm run test build-check: runs-on: group: aws-general-8-plus timeout-minutes: 10 steps: - uses: actions/checkout@v3 - name: Build Docker image run: | docker build \ --build-arg INCLUDE_DB=true \ -t chat-ui-test:latest . - name: Run Docker container run: | export DOTENV_LOCAL=$(<.env.ci) docker run -d --rm --network=host \ --name chat-ui-test \ -e DOTENV_LOCAL="$DOTENV_LOCAL" \ chat-ui-test:latest - name: Wait for server to start run: | for i in {1..10}; do if curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/ | grep -q "200"; then echo "Server is up" exit 0 fi echo "Waiting for server..." sleep 2 done echo "Server did not start in time" docker logs chat-ui-test exit 1 - name: Stop Docker container if: always() run: | docker stop chat-ui-test || true ================================================ FILE: .github/workflows/slugify.yaml ================================================ name: Generate Branch Slug on: workflow_call: inputs: value: description: "Value to slugify" required: true type: string outputs: slug: description: "Slugified value" value: ${{ jobs.generate-slug.outputs.slug }} jobs: generate-slug: runs-on: ubuntu-latest outputs: slug: ${{ steps.slugify.outputs.slug }} steps: - name: Setup Go uses: actions/setup-go@v5 with: go-version: "1.21" - name: Generate slug id: slugify run: | # Create working directory mkdir -p $HOME/slugify cd $HOME/slugify # Create Go script cat > main.go << 'EOF' package main import ( "fmt" "os" "github.com/gosimple/slug" ) func main() { if len(os.Args) < 2 { fmt.Println("Usage: slugify ") os.Exit(1) } text := os.Args[1] slugged := slug.Make(text) fmt.Println(slugged) } EOF # Initialize module and install dependency go mod init slugify go mod tidy go get github.com/gosimple/slug # Build go build -o slugify main.go # Generate slug VALUE="${{ inputs.value }}" echo "Input value: $VALUE" SLUG=$(./slugify "$VALUE") echo "Generated slug: $SLUG" # Export echo "slug=$SLUG" >> $GITHUB_OUTPUT ================================================ FILE: .github/workflows/trufflehog.yml ================================================ on: push: name: Secret Leaks jobs: trufflehog: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 - name: Secret Scanning uses: trufflesecurity/trufflehog@main with: extra_args: --results=verified,unknown ================================================ FILE: .github/workflows/upload-pr-documentation.yml ================================================ name: Upload PR Documentation on: workflow_run: workflows: ["Build PR Documentation"] types: - completed jobs: build: uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main with: package_name: chat-ui secrets: hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} ================================================ FILE: .gitignore ================================================ .DS_Store node_modules /build /.svelte-kit /package .env .env.* vite.config.js.timestamp-* vite.config.ts.timestamp-* SECRET_CONFIG .idea !.env.ci !.env gcp-*.json db models/* !models/add-your-models-here.txt .claude/* !.claude/skills/ ================================================ FILE: .husky/lint-stage-config.js ================================================ export default { "*.{js,jsx,ts,tsx}": ["prettier --write", "eslint --fix", "eslint"], "*.json": ["prettier --write"], }; ================================================ FILE: .husky/pre-commit ================================================ set -e npx lint-staged --config ./.husky/lint-stage-config.js ================================================ FILE: .npmrc ================================================ engine-strict=true ================================================ FILE: .prettierignore ================================================ .DS_Store node_modules /build /.svelte-kit /package /chart .env .env.* !.env.example # Ignore files for PNPM, NPM and YARN pnpm-lock.yaml package-lock.json yarn.lock ================================================ FILE: .prettierrc ================================================ { "useTabs": true, "trailingComma": "es5", "printWidth": 100, "plugins": ["prettier-plugin-svelte", "prettier-plugin-tailwindcss"], "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }] } ================================================ FILE: .vscode/launch.json ================================================ { "version": "0.2.0", "configurations": [ { "command": "npm run dev", "name": "Run development server", "request": "launch", "type": "node-terminal" } ] } ================================================ FILE: .vscode/settings.json ================================================ { "editor.formatOnSave": true, "editor.defaultFormatter": "esbenp.prettier-vscode", "editor.codeActionsOnSave": { "source.fixAll": "explicit" }, "eslint.validate": ["javascript", "svelte"], "[svelte]": { "editor.defaultFormatter": "esbenp.prettier-vscode" }, "[typescript]": { "editor.defaultFormatter": "esbenp.prettier-vscode" } } ================================================ FILE: CLAUDE.md ================================================ # CLAUDE.md This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. ## Overview Chat UI is a SvelteKit application that provides a chat interface for LLMs. It powers HuggingChat (hf.co/chat). The app speaks exclusively to OpenAI-compatible APIs via `OPENAI_BASE_URL`. ## Commands ```bash npm run dev # Start dev server on localhost:5173 npm run build # Production build npm run preview # Preview production build npm run check # TypeScript validation (svelte-kit sync + svelte-check) npm run lint # Check formatting (Prettier) and linting (ESLint) npm run format # Auto-format with Prettier npm run test # Run all tests (Vitest) ``` ### Running a Single Test ```bash npx vitest run path/to/file.spec.ts # Run specific test file npx vitest run -t "test name" # Run test by name npx vitest --watch path/to/file.spec.ts # Watch mode for single file ``` ### Test Environments Tests are split into three workspaces (configured in vite.config.ts): - **Client tests** (`*.svelte.test.ts`): Browser environment with Playwright - **SSR tests** (`*.ssr.test.ts`): Node environment for server-side rendering - **Server tests** (`*.test.ts`, `*.spec.ts`): Node environment for utilities ## Architecture ### Stack - **SvelteKit 2** with Svelte 5 (uses runes: `$state`, `$effect`, `$bindable`) - **MongoDB** for persistence (auto-fallback to in-memory with MongoMemoryServer when `MONGODB_URL` not set) - **TailwindCSS** for styling ### Key Directories ``` src/ ├── lib/ │ ├── components/ # Svelte components (chat/, mcp/, voice/, icons/) │ ├── server/ │ │ ├── api/utils/ # Shared API helpers (auth, superjson, model/conversation resolvers) │ │ ├── textGeneration/ # LLM streaming pipeline │ │ ├── mcp/ # Model Context Protocol integration │ │ ├── router/ # Smart model routing (Omni) │ │ ├── database.ts # MongoDB collections │ │ ├── models.ts # Model registry from OPENAI_BASE_URL/models │ │ └── auth.ts # OpenID Connect authentication │ ├── types/ # TypeScript interfaces (Conversation, Message, User, Model, etc.) │ ├── stores/ # Svelte stores for reactive state │ └── utils/ # Helpers (tree/, marked.ts, auth.ts, etc.) ├── routes/ # SvelteKit file-based routing │ ├── conversation/[id]/ # Chat page + streaming endpoint │ ├── settings/ # User settings pages │ ├── api/ # Legacy v1 API endpoints (mcp, transcribe, fetch-url) │ ├── api/v2/ # REST API endpoints (+server.ts) │ └── r/[id]/ # Shared conversation view ``` ### Text Generation Flow 1. User sends message via `POST /conversation/[id]` 2. Server validates user, fetches conversation history 3. Builds message tree structure (see `src/lib/utils/tree/`) 4. Calls LLM endpoint via OpenAI client 5. Streams response back, stores in MongoDB ### Model Context Protocol (MCP) MCP servers are configured via `MCP_SERVERS` env var. When enabled, tools are exposed as OpenAI function calls. The router can auto-select tools-capable models when `LLM_ROUTER_ENABLE_TOOLS=true`. ### LLM Router (Omni) Smart routing via Arch-Router model. Configured with: - `LLM_ROUTER_ROUTES_PATH`: JSON file defining routes - `LLM_ROUTER_ARCH_BASE_URL`: Router endpoint - Shortcuts: multimodal routes bypass router if `LLM_ROUTER_ENABLE_MULTIMODAL=true` ### Database Collections - `conversations` - Chat sessions with nested messages - `users` - User accounts (OIDC-backed) - `sessions` - Session data - `sharedConversations` - Public share links - `settings` - User preferences ## Environment Setup Copy `.env` to `.env.local` and configure: ```env OPENAI_BASE_URL=https://router.huggingface.co/v1 OPENAI_API_KEY=hf_*** # MONGODB_URL is optional; omit for in-memory DB persisted to ./db ``` See `.env` for full list of variables including router config, MCP servers, auth, and feature flags. ## Code Conventions - TypeScript strict mode enabled - ESLint: no `any`, no non-null assertions - Prettier: tabs, 100 char width, Tailwind class sorting - Server vs client separation via SvelteKit conventions (`+page.server.ts` vs `+page.ts`) ## Feature Development Checklist When building new features, consider: 1. **HuggingChat vs self-hosted**: Wrap HuggingChat-specific features with `publicConfig.isHuggingChat` 2. **Settings persistence**: Add new fields to `src/lib/types/Settings.ts`, update API endpoint at `src/routes/api/v2/user/settings/+server.ts` 3. **Rich dropdowns**: Use `bits-ui` (Select, DropdownMenu) instead of native elements when you need icons/images in options 4. **Scrollbars**: Use `scrollbar-custom` class for styled scrollbars 5. **Icons**: Custom icons in `$lib/components/icons/`, use Carbon (`~icons/carbon/*`) or Lucide (`~icons/lucide/*`) for standard icons 6. **Provider avatars**: Use `PROVIDERS_HUB_ORGS` from `@huggingface/inference` for HF provider avatar URLs ================================================ FILE: Dockerfile ================================================ # syntax=docker/dockerfile:1 ARG INCLUDE_DB=false FROM node:24-slim AS base # install dotenv-cli RUN npm install -g dotenv-cli # switch to a user that works for spaces RUN userdel -r node RUN useradd -m -u 1000 user USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH WORKDIR /app # add a .env.local if the user doesn't bind a volume to it RUN touch /app/.env.local USER root RUN apt-get update RUN apt-get install -y libgomp1 libcurl4 curl dnsutils nano # ensure npm cache dir exists before adjusting ownership RUN mkdir -p /home/user/.npm && chown -R 1000:1000 /home/user/.npm USER user COPY --chown=1000 .env /app/.env COPY --chown=1000 entrypoint.sh /app/entrypoint.sh COPY --chown=1000 package.json /app/package.json COPY --chown=1000 package-lock.json /app/package-lock.json RUN chmod +x /app/entrypoint.sh FROM node:24 AS builder WORKDIR /app COPY --link --chown=1000 package-lock.json package.json ./ ARG APP_BASE= ARG PUBLIC_APP_COLOR= ENV BODY_SIZE_LIMIT=15728640 RUN --mount=type=cache,target=/app/.npm \ npm set cache /app/.npm && \ npm ci COPY --link --chown=1000 . . RUN git config --global --add safe.directory /app && \ npm run build # mongo image FROM mongo:7 AS mongo # image to be used if INCLUDE_DB is false FROM base AS local_db_false # image to be used if INCLUDE_DB is true FROM base AS local_db_true # copy mongo from the other stage COPY --from=mongo /usr/bin/mongo* /usr/bin/ ENV MONGODB_URL=mongodb://localhost:27017 USER root RUN mkdir -p /data/db RUN chown -R 1000:1000 /data/db USER user # final image FROM local_db_${INCLUDE_DB} AS final # build arg to determine if the database should be included ARG INCLUDE_DB=false ENV INCLUDE_DB=${INCLUDE_DB} # svelte requires APP_BASE at build time so it must be passed as a build arg ARG APP_BASE= ARG PUBLIC_APP_COLOR= ARG PUBLIC_COMMIT_SHA= ENV PUBLIC_COMMIT_SHA=${PUBLIC_COMMIT_SHA} ENV BODY_SIZE_LIMIT=15728640 #import the build & dependencies COPY --from=builder --chown=1000 /app/build /app/build COPY --from=builder --chown=1000 /app/node_modules /app/node_modules CMD ["/bin/bash", "-c", "/app/entrypoint.sh"] ================================================ FILE: LICENSE ================================================ Copyright 2018- The Hugging Face team. All rights reserved. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: PRIVACY.md ================================================ ## Privacy > Last updated: Sep 15, 2025 Basics: - Sign-in: You authenticate with your Hugging Face account. - Conversation history: Stored so you can access past chats; you can delete any conversation at any time from the UI. 🗓 Please also consult huggingface.co's main privacy policy at . To exercise any of your legal privacy rights, please send an email to . ## Data handling and processing HuggingChat uses Hugging Face’s Inference Providers to access models from multiple partners via a single API. Depending on the model and availability, inference runs with the corresponding provider. - Inference Providers documentation: - Security & Compliance: Security and routing facts - Hugging Face does not store any user data for training purposes. - Hugging Face does not store the request body or the response when routing requests through Hugging Face. - Logs are kept for debugging purposes for up to 30 days, but no user data or tokens are stored in those logs. - Inference Provider routing uses TLS/SSL to encrypt data in transit. - The Hugging Face Hub (which Inference Providers is a feature of) is SOC 2 Type 2 certified. See . External providers are responsible for their own security and data handling. Please consult each provider’s respective security and privacy policies via the Inference Providers documentation linked above. ## Technical details [![chat-ui](https://img.shields.io/github/stars/huggingface/chat-ui)](https://github.com/huggingface/chat-ui) The app is completely open source, and further development takes place on the [huggingface/chat-ui](https://github.com/huggingface/chat-ui) GitHub repo. We're always open to contributions! You can find the production configuration for HuggingChat [here](https://github.com/huggingface/chat-ui/blob/main/chart/env/prod.yaml). HuggingChat connects to the OpenAI‑compatible Inference Providers router at `https://router.huggingface.co/v1` to access models across multiple providers. Provider selection may be automatic or fixed depending on the model configuration. We welcome any feedback on this app: please participate in the public discussion at ================================================ FILE: README.md ================================================ # Chat UI ![Chat UI repository thumbnail](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/chat-ui/chat-ui-2026.png) A chat interface for LLMs. It is a SvelteKit app and it powers the [HuggingChat app on hf.co/chat](https://huggingface.co/chat). 0. [Quickstart](#quickstart) 1. [Database Options](#database-options) 2. [Launch](#launch) 3. [Optional Docker Image](#optional-docker-image) 4. [Extra parameters](#extra-parameters) 5. [Building](#building) > [!NOTE] > Chat UI only supports OpenAI-compatible APIs via `OPENAI_BASE_URL` and the `/models` endpoint. Provider-specific integrations (legacy `MODELS` env var, GGUF discovery, embeddings, web-search helpers, etc.) are removed, but any service that speaks the OpenAI protocol (llama.cpp server, Ollama, OpenRouter, etc. will work by default). > [!NOTE] > The old version is still available on the [legacy branch](https://github.com/huggingface/chat-ui/tree/legacy) ## Quickstart Chat UI speaks to OpenAI-compatible APIs only. The fastest way to get running is with the Hugging Face Inference Providers router plus your personal Hugging Face access token. **Step 1 – Create `.env.local`:** ```env OPENAI_BASE_URL=https://router.huggingface.co/v1 OPENAI_API_KEY=hf_************************ ``` `OPENAI_API_KEY` can come from any OpenAI-compatible endpoint you plan to call. Pick the combo that matches your setup and drop the values into `.env.local`: | Provider | Example `OPENAI_BASE_URL` | Example key env | | --------------------------------------------- | ---------------------------------- | ----------------------------------------------------------------------- | | Hugging Face Inference Providers router | `https://router.huggingface.co/v1` | `OPENAI_API_KEY=hf_xxx` (or `HF_TOKEN` legacy alias) | | llama.cpp server (`llama.cpp --server --api`) | `http://127.0.0.1:8080/v1` | `OPENAI_API_KEY=sk-local-demo` (any string works; llama.cpp ignores it) | | Ollama (with OpenAI-compatible bridge) | `http://127.0.0.1:11434/v1` | `OPENAI_API_KEY=ollama` | | OpenRouter | `https://openrouter.ai/api/v1` | `OPENAI_API_KEY=sk-or-v1-...` | | Poe | `https://api.poe.com/v1` | `OPENAI_API_KEY=pk_...` | Check the root [`.env` template](./.env) for the full list of optional variables you can override. **Step 2 – Install and launch the dev server:** ```bash git clone https://github.com/huggingface/chat-ui cd chat-ui npm install npm run dev -- --open ``` You now have Chat UI running locally. Open the browser and start chatting. ## Database Options Chat history, users, settings, files, and stats all live in MongoDB. You can point Chat UI at any MongoDB 6/7 deployment. > [!TIP] > For quick local development, you can skip this section. When `MONGODB_URL` is not set, Chat UI falls back to an embedded MongoDB that persists to `./db`. ### MongoDB Atlas (managed) 1. Create a free cluster at [mongodb.com](https://www.mongodb.com/pricing). 2. Add your IP (or `0.0.0.0/0` for development) to the network access list. 3. Create a database user and copy the connection string. 4. Paste that string into `MONGODB_URL` in `.env.local`. Keep the default `MONGODB_DB_NAME=chat-ui` or change it per environment. Atlas keeps MongoDB off your laptop, which is ideal for teams or cloud deployments. ### Local MongoDB (container) If you prefer to run MongoDB in a container: ```bash docker run -d -p 27017:27017 --name mongo-chatui mongo:latest ``` Then set `MONGODB_URL=mongodb://localhost:27017` in `.env.local`. ## Launch After configuring your environment variables, start Chat UI with: ```bash npm install npm run dev ``` The dev server listens on `http://localhost:5173` by default. Use `npm run build` / `npm run preview` for production builds. ## Optional Docker Image The `chat-ui-db` image bundles MongoDB inside the container: ```bash docker run \ -p 3000:3000 \ -e OPENAI_BASE_URL=https://router.huggingface.co/v1 \ -e OPENAI_API_KEY=hf_*** \ -v chat-ui-data:/data \ ghcr.io/huggingface/chat-ui-db:latest ``` All environment variables accepted in `.env.local` can be provided as `-e` flags. ## Extra parameters ### Theming You can use a few environment variables to customize the look and feel of chat-ui. These are by default: ```env PUBLIC_APP_NAME=ChatUI PUBLIC_APP_ASSETS=chatui PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone." PUBLIC_APP_DATA_SHARING= ``` - `PUBLIC_APP_NAME` The name used as a title throughout the app. - `PUBLIC_APP_ASSETS` Is used to find logos & favicons in `static/$PUBLIC_APP_ASSETS`, current options are `chatui` and `huggingchat`. - `PUBLIC_APP_DATA_SHARING` Can be set to 1 to add a toggle in the user settings that lets your users opt-in to data sharing with models creator. ### Models Models are discovered from `${OPENAI_BASE_URL}/models`, and you can optionally override their metadata via the `MODELS` env var (JSON5). Legacy provider‑specific integrations and GGUF discovery are removed. Authorization uses `OPENAI_API_KEY` (preferred). `HF_TOKEN` remains a legacy alias. ### LLM Router (Optional) Chat UI can perform server-side smart routing using [katanemo/Arch-Router-1.5B](https://huggingface.co/katanemo/Arch-Router-1.5B) as the routing model without running a separate router service. The UI exposes a virtual model alias called "Omni" (configurable) that, when selected, chooses the best route/model for each message. - Provide a routes policy JSON via `LLM_ROUTER_ROUTES_PATH`. No sample file ships with this branch, so you must point the variable to a JSON array you create yourself (for example, commit one in your project like `config/routes.chat.json`). Each route entry needs `name`, `description`, `primary_model`, and optional `fallback_models`. - Configure the Arch router selection endpoint with `LLM_ROUTER_ARCH_BASE_URL` (OpenAI-compatible `/chat/completions`) and `LLM_ROUTER_ARCH_MODEL` (e.g. `router/omni`). The Arch call reuses `OPENAI_API_KEY` for auth. - Map `other` to a concrete route via `LLM_ROUTER_OTHER_ROUTE` (default: `casual_conversation`). If Arch selection fails, calls fall back to `LLM_ROUTER_FALLBACK_MODEL`. - Selection timeout can be tuned via `LLM_ROUTER_ARCH_TIMEOUT_MS` (default 10000). - Omni alias configuration: `PUBLIC_LLM_ROUTER_ALIAS_ID` (default `omni`), `PUBLIC_LLM_ROUTER_DISPLAY_NAME` (default `Omni`), and optional `PUBLIC_LLM_ROUTER_LOGO_URL`. When you select Omni in the UI, Chat UI will: - Call the Arch endpoint once (non-streaming) to pick the best route for the last turns. - Emit RouterMetadata immediately (route and actual model used) so the UI can display it. - Stream from the selected model via your configured `OPENAI_BASE_URL`. On errors, it tries route fallbacks. Tool and multimodal shortcuts: - Multimodal: If `LLM_ROUTER_ENABLE_MULTIMODAL=true` and the user sends an image, the router bypasses Arch and uses the model specified in `LLM_ROUTER_MULTIMODAL_MODEL`. Route name: `multimodal`. - Tools: If `LLM_ROUTER_ENABLE_TOOLS=true` and the user has at least one MCP server enabled, the router bypasses Arch and uses `LLM_ROUTER_TOOLS_MODEL`. If that model is missing or misconfigured, it falls back to Arch routing. Route name: `agentic`. ### MCP Tools (Optional) Chat UI can call tools exposed by Model Context Protocol (MCP) servers and feed results back to the model using OpenAI function calling. You can preconfigure trusted servers via env, let users add their own, and optionally have the Omni router auto‑select a tools‑capable model. Configure servers (base list for all users): ```env # JSON array of servers: name, url, optional headers MCP_SERVERS=[ {"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp"}, {"name": "Hugging Face MCP Login", "url": "https://hf.co/mcp?login"} ] # Forward the signed-in user's Hugging Face token to the official HF MCP login endpoint # when no Authorization header is set on that server entry. MCP_FORWARD_HF_USER_TOKEN=true ``` Enable router tool path (Omni): - Set `LLM_ROUTER_ENABLE_TOOLS=true` and choose a tools‑capable target with `LLM_ROUTER_TOOLS_MODEL=`. - The target must support OpenAI tools/function calling. Chat UI surfaces a “tools” badge on models that advertise this; you can also force‑enable it per‑model in settings (see below). Use tools in the UI: - Open “MCP Servers” from the top‑right menu or from the `+` menu in the chat input to add servers, toggle them on, and run Health Check. The server card lists available tools. - When a model calls a tool, the message shows a compact “tool” block with parameters, a progress bar while running, and the result (or error). Results are also provided back to the model for follow‑up. Per‑model overrides: - In Settings → Model, you can toggle “Tool calling (functions)” and “Multimodal input” per model. These overrides apply even if the provider metadata doesn’t advertise the capability. ## Building To create a production version of your app: ```bash npm run build ``` You can preview the production build with `npm run preview`. > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment. ================================================ FILE: chart/Chart.yaml ================================================ apiVersion: v2 name: chat-ui version: 0.0.1-latest type: application icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg ================================================ FILE: chart/env/dev.yaml ================================================ image: repository: huggingface name: chat-ui #nodeSelector: # role-huggingchat: "true" # #tolerations: # - key: "huggingface.co/huggingchat" # operator: "Equal" # value: "true" # effect: "NoSchedule" serviceAccount: enabled: true create: true name: huggingchat-ephemeral ingress: enabled: false ingressInternal: enabled: true path: "/chat" annotations: external-dns.alpha.kubernetes.io/hostname: "*.chat-dev.huggingface.tech" alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck" alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]" alb.ingress.kubernetes.io/group.name: "chat-dev-internal-public" alb.ingress.kubernetes.io/load-balancer-name: "chat-dev-internal-public" alb.ingress.kubernetes.io/ssl-redirect: "443" alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true" alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30 alb.ingress.kubernetes.io/target-type: "ip" alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/bc3eb446-1c04-432c-ac6b-946a88d725da" kubernetes.io/ingress.class: "alb" envVars: TEST: "test" COUPLE_SESSION_WITH_COOKIE_NAME: "token" OPENID_SCOPES: "openid profile inference-api read-mcp read-billing" USE_USER_TOKEN: "true" MCP_FORWARD_HF_USER_TOKEN: "true" AUTOMATIC_LOGIN: "false" ADDRESS_HEADER: "X-Forwarded-For" APP_BASE: "/chat" ALLOW_IFRAME: "false" COOKIE_SAMESITE: "lax" COOKIE_SECURE: "true" EXPOSE_API: "true" METRICS_ENABLED: "true" LOG_LEVEL: "debug" NODE_LOG_STRUCTURED_DATA: "true" OPENAI_BASE_URL: "https://router.huggingface.co/v1" PUBLIC_APP_ASSETS: "huggingchat" PUBLIC_APP_NAME: "HuggingChat" PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone" PUBLIC_ORIGIN: "" PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js" TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507" LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1" LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json" LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B" LLM_ROUTER_OTHER_ROUTE: "casual_conversation" LLM_ROUTER_ARCH_TIMEOUT_MS: "10000" LLM_ROUTER_ENABLE_MULTIMODAL: "true" LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3.5-397B-A17B" LLM_ROUTER_ENABLE_TOOLS: "true" LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905" TRANSCRIPTION_MODEL: "openai/whisper-large-v3-turbo" MCP_SERVERS: > [{"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp?tools=web_search_exa,get_code_context_exa,crawling_exa"}, {"name": "Hugging Face", "url": "https://hf.co/mcp?login"}] MCP_TOOL_TIMEOUT_MS: "120000" PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni" PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png" PUBLIC_LLM_ROUTER_ALIAS_ID: "omni" MODELS: > [ { "id": "Qwen/Qwen3.5-9B", "description": "Dense multimodal hybrid with 262K context excelling at reasoning on-device." }, { "id": "CohereLabs/tiny-aya-global", "description": "Tiny multilingual assistant covering 70+ languages for on-device deployment." }, { "id": "CohereLabs/tiny-aya-earth", "description": "Regional Aya for African languages with culturally tuned on-device inference." }, { "id": "CohereLabs/tiny-aya-fire", "description": "Regional Aya for South Asian languages with culturally tuned on-device inference." }, { "id": "CohereLabs/tiny-aya-water", "description": "Regional Aya for Asia-Pacific and European multilingual on-device tasks." }, { "id": "Qwen/Qwen3.5-122B-A10B", "description": "Multimodal MoE excelling at agentic tool use with 1M context and 201 languages." }, { "id": "Qwen/Qwen3.5-35B-A3B", "description": "Compact multimodal MoE with hybrid DeltaNet, 1M context, and 201 languages." }, { "id": "Qwen/Qwen3.5-27B", "description": "Dense multimodal hybrid with top-tier reasoning density and 1M context." }, { "id": "Qwen/Qwen3.5-397B-A17B", "description": "Native multimodal MoE with hybrid attention, 1M context, and 201 languages.", "parameters": { "max_tokens": 32768 } }, { "id": "allenai/Olmo-3.1-32B-Think", "description": "Updated Olmo Think with extended RL for stronger math, code, and instruction following." }, { "id": "MiniMaxAI/MiniMax-M2.5", "description": "Frontier 230B MoE agent for top-tier coding, tool calling, and fast inference." }, { "id": "zai-org/GLM-5", "description": "Flagship 745B MoE for agentic reasoning, coding, and creative writing." }, { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship Qwen3 vision-language MoE for visual agents, documents, and GUI automation." }, { "id": "google/gemma-3n-E4B-it", "description": "Mobile-first multimodal Gemma handling text, images, video, and audio on-device." }, { "id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "description": "Hybrid Mamba-Transformer with 128K context and controllable reasoning budget." }, { "id": "mistralai/Mistral-7B-Instruct-v0.2", "description": "Efficient 7B instruction model with 32K context for dialogue and coding." }, { "id": "Qwen/Qwen3-Coder-Next-FP8", "description": "FP8 Qwen3-Coder-Next for efficient inference with repository-scale coding agents." }, { "id": "arcee-ai/Trinity-Mini", "description": "Compact US-built MoE for multi-turn agents, tool use, and structured outputs." }, { "id": "Qwen/Qwen3-Coder-Next", "description": "Ultra-sparse coding MoE for repository-scale agents with 256K context." }, { "id": "moonshotai/Kimi-K2.5", "description": "Native multimodal agent with agent swarms for parallel tool orchestration." }, { "id": "allenai/Molmo2-8B", "description": "Open vision-language model excelling at video understanding, pointing, and object tracking." }, { "id": "zai-org/GLM-4.7-Flash", "description": "Fast GLM-4.7 variant optimized for lower latency coding and agents." }, { "id": "zai-org/GLM-4.7", "description": "Flagship GLM MoE for coding, reasoning, and agentic tool use." }, { "id": "zai-org/GLM-4.7-FP8", "description": "FP8 GLM-4.7 for efficient inference with strong coding." }, { "id": "MiniMaxAI/MiniMax-M2.1", "description": "MoE agent model with multilingual coding and fast outputs." }, { "id": "XiaomiMiMo/MiMo-V2-Flash", "description": "Fast MoE reasoning model with speculative decoding for agents." }, { "id": "Qwen/Qwen3-VL-32B-Instruct", "description": "Vision-language Qwen for documents, GUI agents, and visual reasoning." }, { "id": "allenai/Olmo-3.1-32B-Instruct", "description": "Fully open chat model strong at tool use and dialogue." }, { "id": "zai-org/AutoGLM-Phone-9B-Multilingual", "description": "Mobile agent for multilingual Android device automation." }, { "id": "utter-project/EuroLLM-22B-Instruct-2512", "description": "European multilingual model for all EU languages and translation." }, { "id": "dicta-il/DictaLM-3.0-24B-Thinking", "description": "Hebrew-English reasoning model with explicit thinking traces for bilingual QA and logic." }, { "id": "EssentialAI/rnj-1-instruct", "description": "8B code and STEM model rivaling larger models on agentic coding, math, and tool use." }, { "id": "MiniMaxAI/MiniMax-M2", "description": "Compact MoE model tuned for fast coding, agentic workflows, and long-context chat." }, { "id": "PrimeIntellect/INTELLECT-3-FP8", "description": "FP8 INTELLECT-3 variant for cheaper frontier-level math, code, and general reasoning." }, { "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "description": "Flagship Qwen3 vision-language model for high-accuracy image, text, and video reasoning." }, { "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", "description": "Thinking-mode Qwen3-VL that emits detailed multimodal reasoning traces for difficult problems." }, { "id": "Qwen/Qwen3-VL-8B-Instruct", "description": "Smaller Qwen3 vision-language assistant for everyday multimodal chat, captioning, and analysis." }, { "id": "aisingapore/Qwen-SEA-LION-v4-32B-IT", "description": "SEA-LION v4 Qwen optimized for Southeast Asian languages and regional enterprise workloads." }, { "id": "allenai/Olmo-3-32B-Think", "description": "Fully open 32B thinking model excelling at stepwise math, coding, and research reasoning." }, { "id": "allenai/Olmo-3-7B-Instruct", "description": "Lightweight Olmo assistant for instruction following, Q&A, and everyday open-source workflows." }, { "id": "allenai/Olmo-3-7B-Think", "description": "7B Olmo reasoning model delivering transparent multi-step thinking on modest hardware." }, { "id": "deepcogito/cogito-671b-v2.1", "description": "Frontier-scale 671B MoE focused on deep reasoning, math proofs, and complex coding." }, { "id": "deepcogito/cogito-671b-v2.1-FP8", "description": "FP8 Cogito v2.1 making 671B-scale reasoning more affordable to serve and experiment with." }, { "id": "deepseek-ai/DeepSeek-V3.2", "description": "Latest DeepSeek agent model combining strong reasoning, tool-use, and efficient long-context inference." }, { "id": "moonshotai/Kimi-K2-Thinking", "description": "Reasoning-focused Kimi K2 variant for deep chain-of-thought and large agentic tool flows." }, { "id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2", "description": "NVIDIA Nano 12B general assistant for coding, chat, and agents with efficient deployment." }, { "id": "ServiceNow-AI/Apriel-1.6-15b-Thinker", "description": "15B multimodal reasoning model with efficient thinking for enterprise and coding tasks." }, { "id": "openai/gpt-oss-safeguard-20b", "description": "Safety-focused gpt-oss variant for content classification, policy enforcement, and LLM output filtering." }, { "id": "zai-org/GLM-4.5", "description": "Flagship GLM agent model unifying advanced reasoning, coding, and tool-using capabilities." }, { "id": "zai-org/GLM-4.5V-FP8", "description": "FP8 vision-language GLM-4.5V for efficient multilingual visual QA, understanding, and hybrid reasoning." }, { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." }, { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." }, { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." }, { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." }, { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." }, { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." }, { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." }, { "id": "zai-org/GLM-4.6V", "description": "106B vision-language model with 128K context and native tool calling for multimodal agents.", "parameters": { "max_tokens": 8192 } }, { "id": "zai-org/GLM-4.6V-Flash", "description": "9B lightweight vision model for fast local inference with tool calling and UI understanding." }, { "id": "zai-org/GLM-4.6V-FP8", "description": "FP8-quantized GLM-4.6V for efficient multimodal deployment with native tool use." }, { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." }, { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." }, { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." }, { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." }, { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." }, { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." }, { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." }, { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." }, { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." }, { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." }, { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." }, { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." }, { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." }, { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." }, { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." }, { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." }, { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." }, { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." }, { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." }, { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." }, { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." }, { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." }, { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." }, { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." }, { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." }, { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." }, { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." }, { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." }, { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." }, { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." }, { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." }, { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." }, { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." }, { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." }, { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." }, { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." }, { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." }, { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." }, { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." }, { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." }, { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." }, { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." }, { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." }, { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." }, { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." }, { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." }, { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." }, { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." }, { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." }, { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." }, { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } }, { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." }, { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." }, { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." }, { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." }, { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." }, { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." }, { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." }, { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." }, { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." }, { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." }, { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." }, { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." }, { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." }, { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." }, { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." }, { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." }, { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." }, { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." }, { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." }, { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." }, { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." }, { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." }, { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." }, { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." }, { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." }, { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." }, { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." }, { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." }, { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." }, { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." }, { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." }, { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." }, { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." }, { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." }, { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." }, { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." }, { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." }, { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." }, { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." }, { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." }, { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." }, { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." }, { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." }, { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." }, { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." }, { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." }, { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." }, { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." }, { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." }, { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." } ] infisical: enabled: true env: "ephemeral-us-east-1" replicas: 1 autoscaling: enabled: false resources: requests: cpu: 2 memory: 4Gi limits: cpu: 4 memory: 8Gi ================================================ FILE: chart/env/prod.yaml ================================================ image: repository: huggingface name: chat-ui nodeSelector: role-huggingchat: "true" tolerations: - key: "huggingface.co/huggingchat" operator: "Equal" value: "true" effect: "NoSchedule" serviceAccount: enabled: true create: true name: huggingchat-prod ingress: path: "/chat" annotations: alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck" alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]" alb.ingress.kubernetes.io/load-balancer-name: "hub-utils-prod-cloudfront" alb.ingress.kubernetes.io/group.name: "hub-utils-prod-cloudfront" alb.ingress.kubernetes.io/scheme: "internal" alb.ingress.kubernetes.io/ssl-redirect: "443" alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true" alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30 alb.ingress.kubernetes.io/target-type: "ip" alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/5b25b145-75db-4837-b9f3-7f238ba8a9c7,arn:aws:acm:us-east-1:707930574880:certificate/bfdf509c-f44b-400f-b9e1-6f7a861abe91" kubernetes.io/ingress.class: "alb" ingressInternal: enabled: true path: "/chat" annotations: alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck" alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]" alb.ingress.kubernetes.io/group.name: "hub-prod-internal-public" alb.ingress.kubernetes.io/load-balancer-name: "hub-prod-internal-public" alb.ingress.kubernetes.io/ssl-redirect: "443" alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true" alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30 alb.ingress.kubernetes.io/target-type: "ip" alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/5b25b145-75db-4837-b9f3-7f238ba8a9c7,arn:aws:acm:us-east-1:707930574880:certificate/bfdf509c-f44b-400f-b9e1-6f7a861abe91" kubernetes.io/ingress.class: "alb" envVars: COUPLE_SESSION_WITH_COOKIE_NAME: "token" OPENID_SCOPES: "openid profile inference-api read-mcp read-billing" USE_USER_TOKEN: "true" MCP_FORWARD_HF_USER_TOKEN: "true" AUTOMATIC_LOGIN: "false" ADDRESS_HEADER: "X-Forwarded-For" APP_BASE: "/chat" ALLOW_IFRAME: "false" COOKIE_SAMESITE: "lax" COOKIE_SECURE: "true" EXPOSE_API: "true" METRICS_ENABLED: "true" LOG_LEVEL: "debug" NODE_LOG_STRUCTURED_DATA: "true" OPENAI_BASE_URL: "https://router.huggingface.co/v1" PUBLIC_APP_ASSETS: "huggingchat" PUBLIC_APP_NAME: "HuggingChat" PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone" PUBLIC_ORIGIN: "https://huggingface.co" PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js" TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507" LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1" LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json" LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B" LLM_ROUTER_OTHER_ROUTE: "casual_conversation" LLM_ROUTER_ARCH_TIMEOUT_MS: "10000" LLM_ROUTER_ENABLE_MULTIMODAL: "true" LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3.5-397B-A17B" LLM_ROUTER_ENABLE_TOOLS: "true" LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905" TRANSCRIPTION_MODEL: "openai/whisper-large-v3-turbo" MCP_SERVERS: > [{"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp?tools=web_search_exa,get_code_context_exa,crawling_exa"}, {"name": "Hugging Face", "url": "https://hf.co/mcp?login"}] MCP_TOOL_TIMEOUT_MS: "120000" PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni" PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png" PUBLIC_LLM_ROUTER_ALIAS_ID: "omni" MODELS: > [ { "id": "Qwen/Qwen3.5-9B", "description": "Dense multimodal hybrid with 262K context excelling at reasoning on-device." }, { "id": "CohereLabs/tiny-aya-global", "description": "Tiny multilingual assistant covering 70+ languages for on-device deployment." }, { "id": "CohereLabs/tiny-aya-earth", "description": "Regional Aya for African languages with culturally tuned on-device inference." }, { "id": "CohereLabs/tiny-aya-fire", "description": "Regional Aya for South Asian languages with culturally tuned on-device inference." }, { "id": "CohereLabs/tiny-aya-water", "description": "Regional Aya for Asia-Pacific and European multilingual on-device tasks." }, { "id": "Qwen/Qwen3.5-122B-A10B", "description": "Multimodal MoE excelling at agentic tool use with 1M context and 201 languages." }, { "id": "Qwen/Qwen3.5-35B-A3B", "description": "Compact multimodal MoE with hybrid DeltaNet, 1M context, and 201 languages." }, { "id": "Qwen/Qwen3.5-27B", "description": "Dense multimodal hybrid with top-tier reasoning density and 1M context." }, { "id": "Qwen/Qwen3.5-397B-A17B", "description": "Native multimodal MoE with hybrid attention, 1M context, and 201 languages.", "parameters": { "max_tokens": 32768 } }, { "id": "allenai/Olmo-3.1-32B-Think", "description": "Updated Olmo Think with extended RL for stronger math, code, and instruction following." }, { "id": "MiniMaxAI/MiniMax-M2.5", "description": "Frontier 230B MoE agent for top-tier coding, tool calling, and fast inference." }, { "id": "zai-org/GLM-5", "description": "Flagship 745B MoE for agentic reasoning, coding, and creative writing." }, { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship Qwen3 vision-language MoE for visual agents, documents, and GUI automation." }, { "id": "google/gemma-3n-E4B-it", "description": "Mobile-first multimodal Gemma handling text, images, video, and audio on-device." }, { "id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "description": "Hybrid Mamba-Transformer with 128K context and controllable reasoning budget." }, { "id": "mistralai/Mistral-7B-Instruct-v0.2", "description": "Efficient 7B instruction model with 32K context for dialogue and coding." }, { "id": "Qwen/Qwen3-Coder-Next-FP8", "description": "FP8 Qwen3-Coder-Next for efficient inference with repository-scale coding agents." }, { "id": "arcee-ai/Trinity-Mini", "description": "Compact US-built MoE for multi-turn agents, tool use, and structured outputs." }, { "id": "Qwen/Qwen3-Coder-Next", "description": "Ultra-sparse coding MoE for repository-scale agents with 256K context." }, { "id": "moonshotai/Kimi-K2.5", "description": "Native multimodal agent with agent swarms for parallel tool orchestration." }, { "id": "allenai/Molmo2-8B", "description": "Open vision-language model excelling at video understanding, pointing, and object tracking." }, { "id": "zai-org/GLM-4.7-Flash", "description": "Fast GLM-4.7 variant optimized for lower latency coding and agents." }, { "id": "zai-org/GLM-4.7", "description": "Flagship GLM MoE for coding, reasoning, and agentic tool use." }, { "id": "zai-org/GLM-4.7-FP8", "description": "FP8 GLM-4.7 for efficient inference with strong coding." }, { "id": "MiniMaxAI/MiniMax-M2.1", "description": "MoE agent model with multilingual coding and fast outputs." }, { "id": "XiaomiMiMo/MiMo-V2-Flash", "description": "Fast MoE reasoning model with speculative decoding for agents." }, { "id": "Qwen/Qwen3-VL-32B-Instruct", "description": "Vision-language Qwen for documents, GUI agents, and visual reasoning." }, { "id": "allenai/Olmo-3.1-32B-Instruct", "description": "Fully open chat model strong at tool use and dialogue." }, { "id": "zai-org/AutoGLM-Phone-9B-Multilingual", "description": "Mobile agent for multilingual Android device automation." }, { "id": "utter-project/EuroLLM-22B-Instruct-2512", "description": "European multilingual model for all EU languages and translation." }, { "id": "dicta-il/DictaLM-3.0-24B-Thinking", "description": "Hebrew-English reasoning model with explicit thinking traces for bilingual QA and logic." }, { "id": "EssentialAI/rnj-1-instruct", "description": "8B code and STEM model rivaling larger models on agentic coding, math, and tool use." }, { "id": "MiniMaxAI/MiniMax-M2", "description": "Compact MoE model tuned for fast coding, agentic workflows, and long-context chat." }, { "id": "PrimeIntellect/INTELLECT-3-FP8", "description": "FP8 INTELLECT-3 variant for cheaper frontier-level math, code, and general reasoning." }, { "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "description": "Flagship Qwen3 vision-language model for high-accuracy image, text, and video reasoning." }, { "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", "description": "Thinking-mode Qwen3-VL that emits detailed multimodal reasoning traces for difficult problems." }, { "id": "Qwen/Qwen3-VL-8B-Instruct", "description": "Smaller Qwen3 vision-language assistant for everyday multimodal chat, captioning, and analysis." }, { "id": "aisingapore/Qwen-SEA-LION-v4-32B-IT", "description": "SEA-LION v4 Qwen optimized for Southeast Asian languages and regional enterprise workloads." }, { "id": "allenai/Olmo-3-32B-Think", "description": "Fully open 32B thinking model excelling at stepwise math, coding, and research reasoning." }, { "id": "allenai/Olmo-3-7B-Instruct", "description": "Lightweight Olmo assistant for instruction following, Q&A, and everyday open-source workflows." }, { "id": "allenai/Olmo-3-7B-Think", "description": "7B Olmo reasoning model delivering transparent multi-step thinking on modest hardware." }, { "id": "deepcogito/cogito-671b-v2.1", "description": "Frontier-scale 671B MoE focused on deep reasoning, math proofs, and complex coding." }, { "id": "deepcogito/cogito-671b-v2.1-FP8", "description": "FP8 Cogito v2.1 making 671B-scale reasoning more affordable to serve and experiment with." }, { "id": "deepseek-ai/DeepSeek-V3.2", "description": "Latest DeepSeek agent model combining strong reasoning, tool-use, and efficient long-context inference." }, { "id": "moonshotai/Kimi-K2-Thinking", "description": "Reasoning-focused Kimi K2 variant for deep chain-of-thought and large agentic tool flows." }, { "id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2", "description": "NVIDIA Nano 12B general assistant for coding, chat, and agents with efficient deployment." }, { "id": "ServiceNow-AI/Apriel-1.6-15b-Thinker", "description": "15B multimodal reasoning model with efficient thinking for enterprise and coding tasks." }, { "id": "openai/gpt-oss-safeguard-20b", "description": "Safety-focused gpt-oss variant for content classification, policy enforcement, and LLM output filtering." }, { "id": "zai-org/GLM-4.5", "description": "Flagship GLM agent model unifying advanced reasoning, coding, and tool-using capabilities." }, { "id": "zai-org/GLM-4.5V-FP8", "description": "FP8 vision-language GLM-4.5V for efficient multilingual visual QA, understanding, and hybrid reasoning." }, { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." }, { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." }, { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." }, { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." }, { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." }, { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." }, { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." }, { "id": "zai-org/GLM-4.6V", "description": "106B vision-language model with 128K context and native tool calling for multimodal agents.", "parameters": { "max_tokens": 8192 } }, { "id": "zai-org/GLM-4.6V-Flash", "description": "9B lightweight vision model for fast local inference with tool calling and UI understanding." }, { "id": "zai-org/GLM-4.6V-FP8", "description": "FP8-quantized GLM-4.6V for efficient multimodal deployment with native tool use." }, { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." }, { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." }, { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." }, { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." }, { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." }, { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." }, { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." }, { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." }, { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." }, { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." }, { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." }, { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." }, { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." }, { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." }, { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." }, { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." }, { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." }, { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." }, { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." }, { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." }, { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." }, { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." }, { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." }, { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." }, { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." }, { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." }, { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." }, { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." }, { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." }, { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." }, { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." }, { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." }, { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." }, { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." }, { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." }, { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." }, { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." }, { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." }, { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." }, { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." }, { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." }, { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." }, { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." }, { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." }, { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." }, { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." }, { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." }, { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." }, { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." }, { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." }, { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } }, { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." }, { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." }, { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." }, { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." }, { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." }, { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." }, { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." }, { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." }, { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." }, { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." }, { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." }, { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." }, { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." }, { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." }, { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." }, { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." }, { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." }, { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." }, { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." }, { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." }, { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." }, { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." }, { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." }, { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." }, { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." }, { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." }, { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." }, { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." }, { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." }, { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." }, { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." }, { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." }, { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." }, { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." }, { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." }, { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." }, { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." }, { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." }, { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." }, { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." }, { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." }, { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." }, { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." }, { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." }, { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." }, { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." }, { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." }, { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." }, { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." }, { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." } ] infisical: enabled: true env: "prod-us-east-1" autoscaling: enabled: true minReplicas: 2 maxReplicas: 30 targetMemoryUtilizationPercentage: "50" targetCPUUtilizationPercentage: "50" resources: requests: cpu: 2 memory: 4Gi limits: cpu: 4 memory: 8Gi ================================================ FILE: chart/templates/_helpers.tpl ================================================ {{- define "name" -}} {{- default $.Release.Name | trunc 63 | trimSuffix "-" -}} {{- end -}} {{- define "app.name" -}} chat-ui {{- end -}} {{- define "labels.standard" -}} release: {{ $.Release.Name | quote }} heritage: {{ $.Release.Service | quote }} chart: "{{ include "name" . }}" app: "{{ include "app.name" . }}" {{- end -}} {{- define "labels.resolver" -}} release: {{ $.Release.Name | quote }} heritage: {{ $.Release.Service | quote }} chart: "{{ include "name" . }}" app: "{{ include "app.name" . }}-resolver" {{- end -}} ================================================ FILE: chart/templates/config.yaml ================================================ apiVersion: v1 kind: ConfigMap metadata: labels: {{ include "labels.standard" . | nindent 4 }} name: {{ include "name" . }} namespace: {{ .Release.Namespace }} data: {{- range $key, $value := $.Values.envVars }} {{ $key }}: {{ $value | quote }} {{- end }} ================================================ FILE: chart/templates/deployment.yaml ================================================ apiVersion: apps/v1 kind: Deployment metadata: labels: {{ include "labels.standard" . | nindent 4 }} name: {{ include "name" . }} namespace: {{ .Release.Namespace }} {{- if .Values.infisical.enabled }} annotations: secrets.infisical.com/auto-reload: "true" {{- end }} spec: progressDeadlineSeconds: 600 {{- if not $.Values.autoscaling.enabled }} replicas: {{ .Values.replicas }} {{- end }} revisionHistoryLimit: 10 selector: matchLabels: {{ include "labels.standard" . | nindent 6 }} strategy: rollingUpdate: maxSurge: 25% maxUnavailable: 25% type: RollingUpdate template: metadata: labels: {{ include "labels.standard" . | nindent 8 }} annotations: checksum/config: {{ include (print $.Template.BasePath "/config.yaml") . | sha256sum }} {{- if $.Values.envVars.NODE_LOG_STRUCTURED_DATA }} co.elastic.logs/json.expand_keys: "true" {{- end }} spec: {{- if .Values.serviceAccount.enabled }} serviceAccountName: "{{ .Values.serviceAccount.name | default (include "name" .) }}" {{- end }} containers: - name: chat-ui image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}" imagePullPolicy: {{ .Values.image.pullPolicy }} readinessProbe: failureThreshold: 30 periodSeconds: 10 httpGet: path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck port: {{ $.Values.envVars.APP_PORT | default 3000 | int }} livenessProbe: failureThreshold: 30 periodSeconds: 10 httpGet: path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck port: {{ $.Values.envVars.APP_PORT | default 3000 | int }} ports: - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }} name: http protocol: TCP {{- if eq "true" $.Values.envVars.METRICS_ENABLED }} - containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }} name: metrics protocol: TCP {{- end }} resources: {{ toYaml .Values.resources | nindent 12 }} {{- with $.Values.extraEnv }} env: {{- toYaml . | nindent 14 }} {{- end }} envFrom: - configMapRef: name: {{ include "name" . }} {{- if $.Values.infisical.enabled }} - secretRef: name: {{ include "name" $ }}-secs {{- end }} {{- with $.Values.extraEnvFrom }} {{- toYaml . | nindent 14 }} {{- end }} nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }} tolerations: {{ toYaml .Values.tolerations | nindent 8 }} volumes: - name: config configMap: name: {{ include "name" . }} ================================================ FILE: chart/templates/hpa.yaml ================================================ {{- if $.Values.autoscaling.enabled }} apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: labels: {{ include "labels.standard" . | nindent 4 }} name: {{ include "name" . }} namespace: {{ .Release.Namespace }} spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: {{ include "name" . }} minReplicas: {{ $.Values.autoscaling.minReplicas }} maxReplicas: {{ $.Values.autoscaling.maxReplicas }} metrics: {{- if ne "" $.Values.autoscaling.targetMemoryUtilizationPercentage }} - type: Resource resource: name: memory target: type: Utilization averageUtilization: {{ $.Values.autoscaling.targetMemoryUtilizationPercentage | int }} {{- end }} {{- if ne "" $.Values.autoscaling.targetCPUUtilizationPercentage }} - type: Resource resource: name: cpu target: type: Utilization averageUtilization: {{ $.Values.autoscaling.targetCPUUtilizationPercentage | int }} {{- end }} behavior: scaleDown: stabilizationWindowSeconds: 600 policies: - type: Percent value: 10 periodSeconds: 60 scaleUp: stabilizationWindowSeconds: 0 policies: - type: Pods value: 1 periodSeconds: 30 {{- end }} ================================================ FILE: chart/templates/infisical.yaml ================================================ {{- if .Values.infisical.enabled }} apiVersion: secrets.infisical.com/v1alpha1 kind: InfisicalSecret metadata: name: {{ include "name" $ }}-infisical-secret namespace: {{ $.Release.Namespace }} spec: authentication: universalAuth: credentialsRef: secretName: {{ .Values.infisical.operatorSecretName | quote }} secretNamespace: {{ .Values.infisical.operatorSecretNamespace | quote }} secretsScope: envSlug: {{ .Values.infisical.env | quote }} projectSlug: {{ .Values.infisical.project | quote }} secretsPath: / hostAPI: {{ .Values.infisical.url | quote }} managedSecretReference: creationPolicy: Owner secretName: {{ include "name" $ }}-secs secretNamespace: {{ .Release.Namespace | quote }} secretType: Opaque resyncInterval: {{ .Values.infisical.resyncInterval }} {{- end }} ================================================ FILE: chart/templates/ingress-internal.yaml ================================================ {{- if $.Values.ingressInternal.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: annotations: {{ toYaml .Values.ingressInternal.annotations | nindent 4 }} labels: {{ include "labels.standard" . | nindent 4 }} name: {{ include "name" . }}-internal namespace: {{ .Release.Namespace }} spec: {{ if $.Values.ingressInternal.className }} ingressClassName: {{ .Values.ingressInternal.className }} {{ end }} {{- with .Values.ingressInternal.tls }} tls: - hosts: - {{ $.Values.domain | quote }} {{- with .secretName }} secretName: {{ . }} {{- end }} {{- end }} rules: - host: {{ .Values.domain }} http: paths: - backend: service: name: {{ include "name" . }} port: name: http path: {{ $.Values.ingressInternal.path | default "/" }} pathType: Prefix {{- end }} ================================================ FILE: chart/templates/ingress.yaml ================================================ {{- if $.Values.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: annotations: {{ toYaml .Values.ingress.annotations | nindent 4 }} labels: {{ include "labels.standard" . | nindent 4 }} name: {{ include "name" . }} namespace: {{ .Release.Namespace }} spec: {{ if $.Values.ingress.className }} ingressClassName: {{ .Values.ingress.className }} {{ end }} {{- with .Values.ingress.tls }} tls: - hosts: - {{ $.Values.domain | quote }} {{- with .secretName }} secretName: {{ . }} {{- end }} {{- end }} rules: - host: {{ .Values.domain }} http: paths: - backend: service: name: {{ include "name" . }} port: name: http path: {{ $.Values.ingress.path | default "/" }} pathType: Prefix {{- end }} ================================================ FILE: chart/templates/network-policy.yaml ================================================ {{- if $.Values.networkPolicy.enabled }} apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: {{ include "name" . }} namespace: {{ .Release.Namespace }} spec: egress: - ports: - port: 53 protocol: UDP to: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: kube-system podSelector: matchLabels: k8s-app: kube-dns - to: {{- range $ip := .Values.networkPolicy.allowedBlocks }} - ipBlock: cidr: {{ $ip | quote }} {{- end }} - to: - ipBlock: cidr: 0.0.0.0/0 except: - 10.0.0.0/8 - 172.16.0.0/12 - 192.168.0.0/16 - 169.254.169.254/32 podSelector: matchLabels: {{ include "labels.standard" . | nindent 6 }} policyTypes: - Egress {{- end }} ================================================ FILE: chart/templates/service-account.yaml ================================================ {{- if and .Values.serviceAccount.enabled .Values.serviceAccount.create }} apiVersion: v1 kind: ServiceAccount automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} metadata: name: "{{ .Values.serviceAccount.name | default (include "name" .) }}" namespace: {{ .Release.Namespace }} labels: {{ include "labels.standard" . | nindent 4 }} {{- with .Values.serviceAccount.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} {{- end }} ================================================ FILE: chart/templates/service-monitor.yaml ================================================ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: {{ include "labels.standard" . | nindent 4 }} name: {{ include "name" . }} namespace: {{ .Release.Namespace }} spec: selector: matchLabels: {{ include "labels.standard" . | nindent 6 }} endpoints: - port: metrics path: /metrics interval: 10s scheme: http scrapeTimeout: 10s {{- end }} ================================================ FILE: chart/templates/service.yaml ================================================ apiVersion: v1 kind: Service metadata: name: "{{ include "name" . }}" annotations: {{ toYaml .Values.service.annotations | nindent 4 }} namespace: {{ .Release.Namespace }} labels: {{ include "labels.standard" . | nindent 4 }} spec: ports: - name: http port: 80 protocol: TCP targetPort: http {{- if eq "true" $.Values.envVars.METRICS_ENABLED }} - name: metrics port: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }} protocol: TCP targetPort: metrics {{- end }} selector: {{ include "labels.standard" . | nindent 4 }} type: {{.Values.service.type}} ================================================ FILE: chart/values.yaml ================================================ image: repository: ghcr.io/huggingface name: chat-ui tag: 0.0.0-latest pullPolicy: IfNotPresent replicas: 3 domain: huggingface.co networkPolicy: enabled: false allowedBlocks: [] service: type: NodePort annotations: { } serviceAccount: enabled: false create: false name: "" automountServiceAccountToken: true annotations: { } ingress: enabled: true path: "/" annotations: { } # className: "nginx" tls: { } # secretName: XXX ingressInternal: enabled: false path: "/" annotations: { } # className: "nginx" tls: { } resources: requests: cpu: 2 memory: 4Gi limits: cpu: 2 memory: 4Gi nodeSelector: {} tolerations: [] envVars: { } infisical: enabled: false env: "" project: "huggingchat-v2-a1" url: "" resyncInterval: 60 operatorSecretName: "huggingchat-operator-secrets" operatorSecretNamespace: "hub-utils" # Allow to environment injections on top or instead of infisical extraEnvFrom: [] extraEnv: [] autoscaling: enabled: false minReplicas: 1 maxReplicas: 2 targetMemoryUtilizationPercentage: "" targetCPUUtilizationPercentage: "" ## Metrics removed; monitoring configuration no longer used ================================================ FILE: docker-compose.yml ================================================ # For development only # Set MONGODB_URL=mongodb://localhost:27017 in .env.local to use this container services: mongo: image: mongo:8 hostname: mongodb ports: - ${LOCAL_MONGO_PORT:-27017}:27017 command: --replSet rs0 --bind_ip_all #--setParameter notablescan=1 mem_limit: "5g" mem_reservation: "3g" healthcheck: # need to specify the hostname here because the default is the container name, and we run the app outside of docker test: test $$(mongosh --quiet --eval 'try {rs.status().ok} catch(e) {rs.initiate({_id:"rs0",members:[{_id:0,host:"127.0.0.1:${LOCAL_MONGO_PORT:-27017}"}]}).ok}') -eq 1 interval: 5s volumes: - mongodb-data:/data/db restart: always volumes: mongodb-data: ================================================ FILE: docs/source/_toctree.yml ================================================ - local: index title: Chat UI - title: Installation sections: - local: installation/local title: Local - local: installation/docker title: Docker - local: installation/helm title: Helm - title: Configuration sections: - local: configuration/overview title: Overview - local: configuration/theming title: Theming - local: configuration/open-id title: OpenID - local: configuration/mcp-tools title: MCP Tools - local: configuration/llm-router title: LLM Router - local: configuration/metrics title: Metrics - local: configuration/common-issues title: Common Issues - title: Developing sections: - local: developing/architecture title: Architecture ================================================ FILE: docs/source/configuration/common-issues.md ================================================ # Common Issues ## 403: You don't have access to this conversation This usually happens when running Chat UI over HTTP without proper cookie configuration. **Recommended:** Set up a reverse proxy (NGINX, Caddy) to handle HTTPS. **Alternative:** If you must run over HTTP, configure cookies: ```ini COOKIE_SECURE=false COOKIE_SAMESITE=lax ``` Also ensure `PUBLIC_ORIGIN` matches your actual URL: ```ini PUBLIC_ORIGIN=http://localhost:5173 ``` ## Models not loading If models aren't appearing in the UI: 1. Verify `OPENAI_BASE_URL` is correct and accessible 2. Check that `OPENAI_API_KEY` is valid 3. Ensure the endpoint returns models at `${OPENAI_BASE_URL}/models` ## Database connection errors For development, you can skip MongoDB entirely - Chat UI will use an embedded database. For production, verify: - `MONGODB_URL` is a valid connection string - Your IP is whitelisted (for MongoDB Atlas) - The database user has read/write permissions ================================================ FILE: docs/source/configuration/llm-router.md ================================================ # LLM Router Chat UI includes an intelligent routing system that automatically selects the best model for each request. When enabled, users see a virtual "Omni" model that routes to specialized models based on the conversation context. The router uses [katanemo/Arch-Router-1.5B](https://huggingface.co/katanemo/Arch-Router-1.5B) for route selection. ## Configuration ### Basic Setup ```ini # Arch router endpoint (OpenAI-compatible) LLM_ROUTER_ARCH_BASE_URL=https://router.huggingface.co/v1 LLM_ROUTER_ARCH_MODEL=katanemo/Arch-Router-1.5B # Path to your routes policy JSON LLM_ROUTER_ROUTES_PATH=./config/routes.json ``` ### Routes Policy Create a JSON file defining your routes. Each route specifies: ```json [ { "name": "coding", "description": "Programming, debugging, code review", "primary_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "fallback_models": ["meta-llama/Llama-3.3-70B-Instruct"] }, { "name": "casual_conversation", "description": "General chat, questions, explanations", "primary_model": "meta-llama/Llama-3.3-70B-Instruct" } ] ``` ### Fallback Behavior ```ini # Route to use when Arch returns "other" LLM_ROUTER_OTHER_ROUTE=casual_conversation # Model to use if Arch selection fails entirely LLM_ROUTER_FALLBACK_MODEL=meta-llama/Llama-3.3-70B-Instruct # Selection timeout (milliseconds) LLM_ROUTER_ARCH_TIMEOUT_MS=10000 ``` ## Multimodal Routing When a user sends an image, the router can bypass Arch and route directly to a vision model: ```ini LLM_ROUTER_ENABLE_MULTIMODAL=true LLM_ROUTER_MULTIMODAL_MODEL=meta-llama/Llama-3.2-90B-Vision-Instruct ``` ## Tools Routing When a user has MCP servers enabled, the router can automatically select a tools-capable model: ```ini LLM_ROUTER_ENABLE_TOOLS=true LLM_ROUTER_TOOLS_MODEL=meta-llama/Llama-3.3-70B-Instruct ``` ## UI Customization Customize how the router appears in the model selector: ```ini PUBLIC_LLM_ROUTER_ALIAS_ID=omni PUBLIC_LLM_ROUTER_DISPLAY_NAME=Omni PUBLIC_LLM_ROUTER_LOGO_URL=https://example.com/logo.png ``` ## How It Works When a user selects Omni: 1. Chat UI sends the conversation context to the Arch router 2. Arch analyzes the content and returns a route name 3. Chat UI maps the route to the corresponding model 4. The request streams from the selected model 5. On errors, fallback models are tried in order The route selection is displayed in the UI so users can see which model was chosen. ## Message Length Limits To optimize router performance, message content is trimmed before sending to Arch: ```ini # Max characters for assistant messages (default: 500) LLM_ROUTER_MAX_ASSISTANT_LENGTH=500 # Max characters for previous user messages (default: 400) LLM_ROUTER_MAX_PREV_USER_LENGTH=400 ``` The latest user message is never trimmed. ================================================ FILE: docs/source/configuration/mcp-tools.md ================================================ # MCP Tools Chat UI supports tool calling via the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/). MCP servers expose tools that models can invoke during conversations. ## Server Types Chat UI supports two types of MCP servers: ### Base Servers (Admin-configured) Base servers are configured by the administrator via environment variables. They appear for all users and can be enabled/disabled per-user but not removed. ```ini MCP_SERVERS=[ {"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp"}, {"name": "Hugging Face", "url": "https://hf.co/mcp"} ] ``` Each server entry requires: - `name` - Display name shown in the UI - `url` - MCP server endpoint URL - `headers` (optional) - Custom headers for authentication ### User Servers (Added from UI) Users can add their own MCP servers directly from the UI: 1. Open the chat input and click the **+** button (or go to Settings) 2. Select **MCP Servers** 3. Click **Add Server** 4. Enter the server name and URL 5. Run **Health Check** to verify connectivity User-added servers are stored in the browser and can be removed at any time. They work alongside base servers. ## User Token Forwarding When users are logged in via Hugging Face, you can forward their access token to MCP servers: ```ini MCP_FORWARD_HF_USER_TOKEN=true ``` This allows MCP servers to access user-specific resources on their behalf. ## Using Tools 1. Enable the servers you want to use from the MCP Servers panel 2. Start chatting - models will automatically use tools when appropriate ### Model Requirements Not all models support tool calling. To enable tools for a specific model, add it to your `MODELS` override: ```ini MODELS=`[ { "id": "meta-llama/Llama-3.3-70B-Instruct", "supportsTools": true } ]` ``` ## Tool Execution Flow When a model decides to use a tool: 1. The model generates a tool call with parameters 2. Chat UI executes the call against the MCP server 3. Results are displayed in the chat as a collapsible "tool" block 4. Results are fed back to the model for follow-up responses ## Integration with LLM Router When using the [LLM Router](./llm-router), you can configure automatic routing to a tools-capable model: ```ini LLM_ROUTER_ENABLE_TOOLS=true LLM_ROUTER_TOOLS_MODEL=meta-llama/Llama-3.3-70B-Instruct ``` When a user has MCP servers enabled and selects the Omni model, the router will automatically use the specified tools model. ================================================ FILE: docs/source/configuration/metrics.md ================================================ # Metrics The server can expose prometheus metrics on port `5565` but is off by default. You may enable the metrics server with `METRICS_ENABLED=true` and change the port with `METRICS_PORT=1234`. In development with `npm run dev`, the metrics server does not shutdown gracefully due to Sveltekit not providing hooks for restart. It's recommended to disable the metrics server in this case. ================================================ FILE: docs/source/configuration/open-id.md ================================================ # OpenID By default, users are attributed a unique ID based on their browser session. To authenticate users with OpenID Connect, configure the following: ```ini OPENID_CLIENT_ID=your_client_id OPENID_CLIENT_SECRET=your_client_secret OPENID_SCOPES="openid profile" ``` Use the provider URL for standard OpenID Connect discovery: ```ini OPENID_PROVIDER_URL=https://your-provider.com ``` Advanced: you can also provide a client metadata document via `OPENID_CONFIG`. This value must be a JSON/JSON5 object (for example, a CIMD document) and is parsed server‑side to populate OpenID settings. **Redirect URI:** `https://your-domain.com/login/callback` ## Access Control Restrict access to specific users: ```ini # Allow only specific email addresses ALLOWED_USER_EMAILS=["user@example.com", "admin@example.com"] # Allow all users from specific domains ALLOWED_USER_DOMAINS=["example.com", "company.org"] ``` ## Hugging Face Login For Hugging Face authentication, you can use automatic client registration: ```ini OPENID_CLIENT_ID=__CIMD__ ``` This creates an OAuth app automatically when deployed. See the [CIMD spec](https://datatracker.ietf.org/doc/draft-ietf-oauth-client-id-metadata-document/) for details. ## User Token Forwarding When users log in via Hugging Face, you can forward their token for inference: ```ini USE_USER_TOKEN=true ``` ## Auto-Login Force authentication on all routes: ```ini AUTOMATIC_LOGIN=true ``` ================================================ FILE: docs/source/configuration/overview.md ================================================ # Configuration Overview Chat UI is configured through environment variables. Default values are in `.env`; override them in `.env.local` or via your environment. ## Required Configuration Chat UI connects to any OpenAI-compatible API endpoint: ```ini OPENAI_BASE_URL=https://router.huggingface.co/v1 OPENAI_API_KEY=hf_************************ ``` Models are automatically discovered from `${OPENAI_BASE_URL}/models`. No manual model configuration is required. ## Database ```ini MONGODB_URL=mongodb://localhost:27017 MONGODB_DB_NAME=chat-ui ``` For development, `MONGODB_URL` is optional - Chat UI falls back to an embedded MongoDB that persists to `./db`. ## Model Overrides To customize model behavior, use the `MODELS` environment variable (JSON5 format): ```ini MODELS=`[ { "id": "meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B", "multimodal": false, "supportsTools": true } ]` ``` Override properties: - `id` - Model identifier (must match an ID from the `/models` endpoint) - `name` - Display name in the UI - `multimodal` - Enable image uploads - `supportsTools` - Enable MCP tool calling for models that don’t advertise tool support - `parameters` - Override default parameters (temperature, max_tokens, etc.) ## Task Model Set a specific model for internal tasks (title generation, etc.): ```ini TASK_MODEL=meta-llama/Llama-3.1-8B-Instruct ``` If not set, the current conversation model is used. ## Voice Transcription Enable voice input with Whisper: ```ini TRANSCRIPTION_MODEL=openai/whisper-large-v3-turbo TRANSCRIPTION_BASE_URL=https://router.huggingface.co/hf-inference/models ``` ## Feature Flags ```ini LLM_SUMMARIZATION=true # Enable automatic conversation title generation ENABLE_DATA_EXPORT=true # Allow users to export their data ALLOW_IFRAME=false # Disallow embedding in iframes (set to true to allow) ``` ## User Authentication Use OpenID Connect for authentication: ```ini OPENID_CLIENT_ID=your_client_id OPENID_CLIENT_SECRET=your_client_secret OPENID_SCOPES="openid profile" ``` See [OpenID configuration](./open-id) for details. ## Environment Variable Reference See the [`.env` file](https://github.com/huggingface/chat-ui/blob/main/.env) for the complete list of available options. ================================================ FILE: docs/source/configuration/theming.md ================================================ # Theming Customize the look and feel of Chat UI with these environment variables: ```ini PUBLIC_APP_NAME=ChatUI PUBLIC_APP_ASSETS=chatui PUBLIC_APP_DESCRIPTION="Making the community's best AI chat models available to everyone." ``` - `PUBLIC_APP_NAME` - The name used as a title throughout the app - `PUBLIC_APP_ASSETS` - Directory for logos & favicons in `static/$PUBLIC_APP_ASSETS`. Options: `chatui`, `huggingchat` - `PUBLIC_APP_DESCRIPTION` - Description shown in meta tags and about sections ## Additional Options ```ini PUBLIC_APP_DATA_SHARING=1 # Show data sharing opt-in toggle in settings PUBLIC_ORIGIN=https://chat.example.com # Your public URL (required for sharing) ``` ================================================ FILE: docs/source/developing/architecture.md ================================================ # Architecture This document provides a high-level overview of the Chat UI codebase. If you're looking to contribute or understand how the codebase works, this is the place for you! ## Overview Chat UI provides a simple interface connecting LLMs to external tools via MCP. The project uses [MongoDB](https://www.mongodb.com/) and [SvelteKit](https://kit.svelte.dev/) with [Tailwind](https://tailwindcss.com/). Key architectural decisions: - **OpenAI-compatible only**: All model interactions use the OpenAI API format - **MCP for tools**: Tool calling is handled via Model Context Protocol servers - **Auto-discovery**: Models are discovered from the `/models` endpoint ## Code Map ### `routes` All routes rendered with SSR via SvelteKit. The majority of backend and frontend logic lives here, with shared modules in `lib` (client) and `lib/server` (server). ### `textGeneration` Provides a standard interface for chat features including model output, tool calls, and streaming. Outputs `MessageUpdate`s for fine-grained status updates (new tokens, tool results, etc.). ### `endpoints` Provides the streaming interface for OpenAI-compatible endpoints. Models are fetched and cached from `${OPENAI_BASE_URL}/models`. ### `mcp` Implements MCP client functionality for tool discovery and execution. See [MCP Tools](../configuration/mcp-tools) for configuration. ### `llmRouter` Intelligent routing logic that selects the best model for each request. Uses the Arch router model for classification. See [LLM Router](../configuration/llm-router) for details. ### `migrations` MongoDB migrations for maintaining backwards compatibility across schema changes. Any schema changes must include a migration. ## Development ```bash npm install npm run dev ``` The dev server runs at `http://localhost:5173` with hot reloading. ================================================ FILE: docs/source/index.md ================================================ # Chat UI Open source chat interface with support for tools, multimodal inputs, and intelligent routing across models. The app uses MongoDB and SvelteKit behind the scenes. Try the live version called [HuggingChat on hf.co/chat](https://huggingface.co/chat) or [setup your own instance](./installation/local). Chat UI connects to any OpenAI-compatible API endpoint, making it work with: - [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) - [Ollama](https://ollama.ai) - [llama.cpp](https://github.com/ggerganov/llama.cpp) - [OpenRouter](https://openrouter.ai) - Any other OpenAI-compatible service **[MCP Tools](./configuration/mcp-tools)**: Function calling via Model Context Protocol (MCP) servers **[LLM Router](./configuration/llm-router)**: Intelligent routing to select the best model for each request **[Multimodal](./configuration/overview)**: Image uploads on models that support vision **[OpenID](./configuration/open-id)**: Optional user authentication via OpenID Connect ## Quickstart **Step 1 - Create `.env.local`:** ```ini OPENAI_BASE_URL=https://router.huggingface.co/v1 OPENAI_API_KEY=hf_************************ ``` You can use any OpenAI-compatible endpoint: | Provider | `OPENAI_BASE_URL` | `OPENAI_API_KEY` | | ------------ | ---------------------------------- | ---------------- | | Hugging Face | `https://router.huggingface.co/v1` | `hf_xxx` | | Ollama | `http://127.0.0.1:11434/v1` | `ollama` | | llama.cpp | `http://127.0.0.1:8080/v1` | `sk-local` | | OpenRouter | `https://openrouter.ai/api/v1` | `sk-or-v1-xxx` | **Step 2 - Install and run:** ```bash git clone https://github.com/huggingface/chat-ui cd chat-ui npm install npm run dev -- --open ``` That's it! Chat UI will automatically discover available models from your endpoint. > [!TIP] > MongoDB is optional for development. When `MONGODB_URL` is not set, Chat UI uses an embedded database that persists to `./db`. For production deployments, see the [installation guides](./installation/local). ================================================ FILE: docs/source/installation/docker.md ================================================ # Running on Docker Pre-built Docker images are available: - **`ghcr.io/huggingface/chat-ui-db`** - Includes MongoDB (recommended for quick setup) - **`ghcr.io/huggingface/chat-ui`** - Requires external MongoDB ## Quick Start (with bundled MongoDB) ```bash docker run -p 3000:3000 \ -e OPENAI_BASE_URL=https://router.huggingface.co/v1 \ -e OPENAI_API_KEY=hf_*** \ -v chat-ui-data:/data \ ghcr.io/huggingface/chat-ui-db ``` ## With External MongoDB If you have an existing MongoDB instance: ```bash docker run -p 3000:3000 \ -e OPENAI_BASE_URL=https://router.huggingface.co/v1 \ -e OPENAI_API_KEY=hf_*** \ -e MONGODB_URL=mongodb://host.docker.internal:27017 \ ghcr.io/huggingface/chat-ui ``` Use `host.docker.internal` to reach MongoDB running on your host machine, or provide your MongoDB Atlas connection string. ## Using an Environment File For more configuration options, use `--env-file` to avoid leaking secrets in shell history: ```bash docker run -p 3000:3000 \ --env-file .env.local \ -v chat-ui-data:/data \ ghcr.io/huggingface/chat-ui-db ``` See the [configuration overview](../configuration/overview) for all available environment variables. ================================================ FILE: docs/source/installation/helm.md ================================================ # Helm The Helm chart is a work in progress and should be considered unstable. Breaking changes may be pushed without migration guides. Contributions welcome! For Kubernetes deployment, use the Helm chart in `/chart`. No chart repository is published, so clone the repository and install by path. ## Installation ```bash git clone https://github.com/huggingface/chat-ui cd chat-ui helm install chat-ui ./chart -f values.yaml ``` ## Example values.yaml ```yaml replicas: 1 domain: example.com service: type: ClusterIP resources: requests: cpu: 100m memory: 2Gi limits: cpu: "4" memory: 6Gi envVars: OPENAI_BASE_URL: https://router.huggingface.co/v1 OPENAI_API_KEY: hf_*** MONGODB_URL: mongodb://chat-ui-mongo:27017 ``` See the [configuration overview](../configuration/overview) for all available environment variables. ================================================ FILE: docs/source/installation/local.md ================================================ # Running Locally ## Quick Start 1. Create a `.env.local` file with your API credentials: ```ini OPENAI_BASE_URL=https://router.huggingface.co/v1 OPENAI_API_KEY=hf_************************ ``` 2. Install and run: ```bash npm install npm run dev -- --open ``` That's it! Chat UI will discover available models automatically from your endpoint. ## Configuration Chat UI connects to any OpenAI-compatible API. Set `OPENAI_BASE_URL` to your provider: | Provider | `OPENAI_BASE_URL` | | ------------ | ---------------------------------- | | Hugging Face | `https://router.huggingface.co/v1` | | Ollama | `http://127.0.0.1:11434/v1` | | llama.cpp | `http://127.0.0.1:8080/v1` | | OpenRouter | `https://openrouter.ai/api/v1` | See the [configuration overview](../configuration/overview) for all available options. ## Database For **development**, MongoDB is optional. When `MONGODB_URL` is not set, Chat UI uses an embedded MongoDB server that persists data to the `./db` folder. For **production**, you should use a dedicated MongoDB instance: ### Option 1: Local MongoDB (Docker) ```bash docker run -d -p 27017:27017 -v mongo-chat-ui:/data --name mongo-chat-ui mongo:latest ``` Then set `MONGODB_URL=mongodb://localhost:27017` in `.env.local`. ### Option 2: MongoDB Atlas (Managed) Use [MongoDB Atlas free tier](https://www.mongodb.com/pricing) for a managed database. Copy the connection string to `MONGODB_URL`. ## Running in Production For production deployments: ```bash npm install npm run build npm run preview ``` The server listens on `http://localhost:4173` by default. ================================================ FILE: entrypoint.sh ================================================ ENV_LOCAL_PATH=/app/.env.local if test -z "${DOTENV_LOCAL}" ; then if ! test -f "${ENV_LOCAL_PATH}" ; then echo "DOTENV_LOCAL was not found in the ENV variables and .env.local is not set using a bind volume. Make sure to set environment variables properly. " fi; else echo "DOTENV_LOCAL was found in the ENV variables. Creating .env.local file." cat <<< "$DOTENV_LOCAL" > ${ENV_LOCAL_PATH} fi; if [ "$INCLUDE_DB" = "true" ] ; then echo "Starting local MongoDB instance" nohup mongod & fi; export PUBLIC_VERSION=$(node -p "require('./package.json').version") dotenv -e /app/.env -c -- node --dns-result-order=ipv4first /app/build/index.js -- --host 0.0.0.0 --port 3000 ================================================ FILE: models/add-your-models-here.txt ================================================ You can add .gguf files to this folder, and they will be picked up automatically by chat-ui. ================================================ FILE: package.json ================================================ { "name": "chat-ui", "version": "0.20.0", "private": true, "packageManager": "npm@9.5.0", "scripts": { "dev": "vite dev", "build": "vite build", "build:static": "ADAPTER=static vite build", "preview": "vite preview", "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", "lint": "prettier --check . && eslint .", "format": "prettier --write .", "test": "vitest", "updateLocalEnv": "vite-node --options.transformMode.ssr='/.*/' scripts/updateLocalEnv.ts", "populate": "vite-node --options.transformMode.ssr='/.*/' scripts/populate.ts", "config": "vite-node --options.transformMode.ssr='/.*/' scripts/config.ts", "prepare": "husky" }, "devDependencies": { "@faker-js/faker": "^8.4.1", "@iconify-json/carbon": "^1.1.16", "@iconify-json/eos-icons": "^1.1.6", "@iconify-json/lucide": "^1.2.77", "@sveltejs/adapter-node": "^5.2.12", "@sveltejs/adapter-static": "^3.0.8", "@sveltejs/kit": "^2.52.2", "@sveltejs/vite-plugin-svelte": "^5.0.3", "@tailwindcss/typography": "^0.5.9", "@types/dompurify": "^3.0.5", "@types/js-yaml": "^4.0.9", "@types/katex": "^0.16.7", "@types/mime-types": "^2.1.4", "@types/minimist": "^1.2.5", "@types/node": "^22.1.0", "@types/parquetjs": "^0.10.3", "@types/uuid": "^9.0.8", "@types/yazl": "^3.3.0", "@typescript-eslint/eslint-plugin": "^6.x", "@typescript-eslint/parser": "^6.x", "bson-objectid": "^2.0.4", "dompurify": "^3.2.4", "eslint": "^8.28.0", "eslint-config-prettier": "^8.5.0", "eslint-plugin-svelte": "^2.45.1", "husky": "^9.0.11", "isomorphic-dompurify": "2.13.0", "js-yaml": "^4.1.1", "lint-staged": "^15.2.7", "minimist": "^1.2.8", "mongodb-memory-server": "^10.1.2", "playwright": "^1.55.1", "prettier": "^3.5.3", "prettier-plugin-svelte": "^3.2.6", "prettier-plugin-tailwindcss": "^0.6.11", "sade": "^1.8.1", "superjson": "^2.2.2", "svelte": "^5.53.7", "svelte-check": "^4.0.0", "tslib": "^2.4.1", "typescript": "^5.5.0", "unplugin-icons": "^0.16.1", "vite": "^6.3.5", "vite-node": "^3.0.9", "vitest": "^3.1.4", "vitest-browser-svelte": "^0.1.0", "yazl": "^3.3.1" }, "type": "module", "dependencies": { "@huggingface/hub": "^2.2.0", "@huggingface/inference": "^4.11.3", "@iconify-json/bi": "^1.1.21", "@modelcontextprotocol/sdk": "^1.26.0", "@resvg/resvg-js": "^2.6.2", "ajv": "^8.18.0", "autoprefixer": "^10.4.14", "bits-ui": "^2.14.2", "date-fns": "^2.29.3", "devalue": "^5.6.4", "dotenv": "^16.5.0", "file-type": "^21.3.1", "handlebars": "^4.7.8", "highlight.js": "^11.7.0", "htmlparser2": "^10.0.0", "ip-address": "^9.0.5", "jsdom": "^28.1.0", "json5": "^2.2.3", "katex": "^0.16.21", "marked": "^12.0.1", "mime-types": "^2.1.35", "mongodb": "^5.8.0", "nanoid": "^5.0.9", "openai": "^4.44.0", "openid-client": "^5.4.2", "parquetjs": "^0.11.2", "pino": "^9.0.0", "pino-pretty": "^11.0.0", "postcss": "^8.4.31", "prom-client": "^15.1.3", "qs": "^6.14.2", "satori": "^0.10.11", "satori-html": "^0.3.2", "sharp": "^0.33.4", "tailwind-scrollbar": "^3.0.0", "tailwindcss": "^3.4.0", "undici": "^7.18.2", "uuid": "^10.0.0", "web-haptics": "^0.0.6", "zod": "^3.22.3" }, "overrides": { "@reflink/reflink": "file:stub/@reflink/reflink" } } ================================================ FILE: postcss.config.js ================================================ export default { plugins: { tailwindcss: {}, autoprefixer: {}, }, }; ================================================ FILE: scripts/config.ts ================================================ import sade from "sade"; // @ts-expect-error: vite-node makes the var available but the typescript compiler doesn't see them import { config, ready } from "$lib/server/config"; const prog = sade("config"); await ready; prog .command("clear") .describe("Clear all config keys") .action(async () => { console.log("Clearing config..."); await clear(); }); prog .command("add ") .describe("Add a new config key") .action(async (key: string, value: string) => { await add(key, value); }); prog .command("remove ") .describe("Remove a config key") .action(async (key: string) => { console.log(`Removing ${key}`); await remove(key); process.exit(0); }); prog .command("help") .describe("Show help information") .action(() => { prog.help(); process.exit(0); }); async function clear() { await config.clear(); process.exit(0); } async function add(key: string, value: string) { if (!key || !value) { console.error("Key and value are required"); process.exit(1); } await config.set(key as keyof typeof config.keysFromEnv, value); process.exit(0); } async function remove(key: string) { if (!key) { console.error("Key is required"); process.exit(1); } await config.delete(key as keyof typeof config.keysFromEnv); process.exit(0); } // Parse arguments and handle help automatically prog.parse(process.argv); ================================================ FILE: scripts/populate.ts ================================================ import readline from "readline"; import minimist from "minimist"; // @ts-expect-error: vite-node makes the var available but the typescript compiler doesn't see them import { env } from "$env/dynamic/private"; import { faker } from "@faker-js/faker"; import { ObjectId } from "mongodb"; // @ts-expect-error: vite-node makes the var available but the typescript compiler doesn't see them import { ready } from "$lib/server/config"; import { collections } from "$lib/server/database.ts"; import { models } from "../src/lib/server/models.ts"; import type { User } from "../src/lib/types/User"; import type { Assistant } from "../src/lib/types/Assistant"; import type { Conversation } from "../src/lib/types/Conversation"; import type { Settings } from "../src/lib/types/Settings"; import { Message } from "../src/lib/types/Message.ts"; import { addChildren } from "../src/lib/utils/tree/addChildren.ts"; import { generateSearchTokens } from "../src/lib/utils/searchTokens.ts"; import { ReviewStatus } from "../src/lib/types/Review.ts"; import fs from "fs"; import path from "path"; const rl = readline.createInterface({ input: process.stdin, output: process.stdout, }); await ready; rl.on("close", function () { process.exit(0); }); const samples = fs.readFileSync(path.join(__dirname, "samples.txt"), "utf8").split("\n---\n"); const possibleFlags = ["reset", "all", "users", "settings", "assistants", "conversations"]; const argv = minimist(process.argv.slice(2)); const flags = argv["_"].filter((flag) => possibleFlags.includes(flag)); async function generateMessages(preprompt?: string): Promise { const isLinear = faker.datatype.boolean(0.5); const isInterrupted = faker.datatype.boolean(0.05); const messages: Message[] = []; messages.push({ id: crypto.randomUUID(), from: "system", content: preprompt ?? "", createdAt: faker.date.recent({ days: 30 }), updatedAt: faker.date.recent({ days: 30 }), }); let isUser = true; let lastId = messages[0].id; if (isLinear) { const convLength = faker.number.int({ min: 1, max: 25 }) * 2; // must always be even for (let i = 0; i < convLength; i++) { lastId = addChildren( { messages, rootMessageId: messages[0].id, }, { from: isUser ? "user" : "assistant", content: faker.lorem.sentence({ min: 10, max: isUser ? 50 : 200, }) + (!isUser && Math.random() < 0.1 ? "\n```\n" + faker.helpers.arrayElement(samples) + "\n```\n" : ""), createdAt: faker.date.recent({ days: 30 }), updatedAt: faker.date.recent({ days: 30 }), interrupted: !isUser && i === convLength - 1 && isInterrupted, }, lastId ); isUser = !isUser; } } else { const convLength = faker.number.int({ min: 2, max: 200 }); for (let i = 0; i < convLength; i++) { addChildren( { messages, rootMessageId: messages[0].id, }, { from: isUser ? "user" : "assistant", content: faker.lorem.sentence({ min: 10, max: isUser ? 50 : 200, }) + (!isUser && Math.random() < 0.1 ? "\n```\n" + faker.helpers.arrayElement(samples) + "\n```\n" : ""), createdAt: faker.date.recent({ days: 30 }), updatedAt: faker.date.recent({ days: 30 }), interrupted: !isUser && i === convLength - 1 && isInterrupted, }, faker.helpers.arrayElement([ messages[0].id, ...messages.filter((m) => m.from === (isUser ? "assistant" : "user")).map((m) => m.id), ]) ); isUser = !isUser; } } return messages; } async function seed() { console.log("Seeding..."); const modelIds = models.map((model) => model.id); if (flags.includes("reset")) { console.log("Starting reset of DB"); await collections.users.deleteMany({}); await collections.settings.deleteMany({}); await collections.assistants.deleteMany({}); await collections.conversations.deleteMany({}); await collections.migrationResults.deleteMany({}); await collections.semaphores.deleteMany({}); console.log("Reset done"); } if (flags.includes("users") || flags.includes("all")) { console.log("Creating 100 new users"); const newUsers: User[] = Array.from({ length: 100 }, () => ({ _id: new ObjectId(), createdAt: faker.date.recent({ days: 30 }), updatedAt: faker.date.recent({ days: 30 }), username: faker.internet.userName(), name: faker.person.fullName(), hfUserId: faker.string.alphanumeric(24), avatarUrl: faker.image.avatar(), })); await collections.users.insertMany(newUsers); console.log("Done creating users."); } const users = await collections.users.find().toArray(); if (flags.includes("settings") || flags.includes("all")) { console.log("Updating settings for all users"); users.forEach(async (user) => { const settings: Settings = { userId: user._id, shareConversationsWithModelAuthors: faker.datatype.boolean(0.25), hideEmojiOnSidebar: faker.datatype.boolean(0.25), activeModel: faker.helpers.arrayElement(modelIds), createdAt: faker.date.recent({ days: 30 }), updatedAt: faker.date.recent({ days: 30 }), disableStream: faker.datatype.boolean(0.25), directPaste: faker.datatype.boolean(0.25), hidePromptExamples: {}, customPrompts: {}, assistants: [], }; await collections.settings.updateOne( { userId: user._id }, { $set: { ...settings } }, { upsert: true } ); }); console.log("Done updating settings."); } if (flags.includes("assistants") || flags.includes("all")) { console.log("Creating assistants for all users"); await Promise.all( users.map(async (user) => { const name = faker.animal.insect(); const assistants = faker.helpers.multiple( () => ({ _id: new ObjectId(), name, createdById: user._id, createdByName: user.username, createdAt: faker.date.recent({ days: 30 }), updatedAt: faker.date.recent({ days: 30 }), userCount: faker.number.int({ min: 1, max: 100000 }), review: faker.helpers.enumValue(ReviewStatus), modelId: faker.helpers.arrayElement(modelIds), description: faker.lorem.sentence(), preprompt: faker.hacker.phrase(), exampleInputs: faker.helpers.multiple(() => faker.lorem.sentence(), { count: faker.number.int({ min: 0, max: 4 }), }), searchTokens: generateSearchTokens(name), last24HoursCount: faker.number.int({ min: 0, max: 1000 }), }), { count: faker.number.int({ min: 3, max: 10 }) } ); await collections.assistants.insertMany(assistants); await collections.settings.updateOne( { userId: user._id }, { $set: { assistants: assistants.map((a) => a._id.toString()) } }, { upsert: true } ); }) ); console.log("Done creating assistants."); } if (flags.includes("conversations") || flags.includes("all")) { console.log("Creating conversations for all users"); await Promise.all( users.map(async (user) => { const conversations = faker.helpers.multiple( async () => { const settings = await collections.settings.findOne({ userId: user._id }); const assistantId = settings?.assistants && settings.assistants.length > 0 && faker.datatype.boolean(0.1) ? faker.helpers.arrayElement(settings.assistants) : undefined; const preprompt = (assistantId ? await collections.assistants .findOne({ _id: assistantId }) .then((assistant: Assistant) => assistant?.preprompt ?? "") : faker.helpers.maybe(() => faker.hacker.phrase(), { probability: 0.5 })) ?? ""; const messages = await generateMessages(preprompt); const conv = { _id: new ObjectId(), userId: user._id, assistantId, preprompt, createdAt: faker.date.recent({ days: 145 }), updatedAt: faker.date.recent({ days: 145 }), model: faker.helpers.arrayElement(modelIds), title: faker.internet.emoji() + " " + faker.hacker.phrase(), // embeddings removed in this build messages, rootMessageId: messages[0].id, } satisfies Conversation; return conv; }, { count: faker.number.int({ min: 10, max: 200 }) } ); await collections.conversations.insertMany(await Promise.all(conversations)); }) ); console.log("Done creating conversations."); } } // run seed (async () => { try { rl.question( "You're about to run a seeding script on the following MONGODB_URL: \x1b[31m" + env.MONGODB_URL + "\x1b[0m\n\n With the following flags: \x1b[31m" + flags.join("\x1b[0m , \x1b[31m") + "\x1b[0m\n \n\n Are you sure you want to continue? (yes/no): ", async (confirm) => { if (confirm !== "yes") { console.log("Not 'yes', exiting."); rl.close(); process.exit(0); } console.log("Starting seeding..."); await seed(); console.log("Seeding done."); rl.close(); } ); } catch (e) { console.error(e); process.exit(1); } })(); ================================================ FILE: scripts/samples.txt ================================================ import { Observable, of, from, interval, throwError } from 'rxjs'; import { map, filter, catchError, switchMap, take, tap } from 'rxjs/operators'; // Mock function to fetch stock prices (simulates API call) const fetchStockPrice = (ticker: string): Observable => { return new Observable((observer) => { const intervalId = setInterval(() => { if (Math.random() < 0.1) { // Simulating an error 10% of the time observer.error(`Error fetching stock price for ${ticker}`); } else { const price = parseFloat((Math.random() * 1000).toFixed(2)); observer.next(price); } }, 1000); return () => { clearInterval(intervalId); console.log(`Stopped fetching prices for ${ticker}`); }; }); }; // Example usage: Tracking stock price updates const stockTicker = 'AAPL'; const stockPrice$ = fetchStockPrice(stockTicker).pipe( map(price => ({ ticker: stockTicker, price })), // Transform data filter(data => data.price > 500), // Only keep prices above 500 tap(data => console.log(`Price update:`, data)), // Side effect: Logging catchError(err => { console.error(err); return of({ ticker: stockTicker, price: null }); // Fallback observable }) ); // Subscribe to the stock price updates const subscription = stockPrice$.subscribe({ next: data => console.log(`Subscriber received:`, data), error: err => console.error(`Subscription error:`, err), complete: () => console.log('Stream complete'), }); // Automatically unsubscribe after 10 seconds setTimeout(() => { subscription.unsubscribe(); console.log('Unsubscribed from stock price updates.'); }, 10000); --- class EnforceAttrsMeta(type): """ Metaclass that enforces the presence of specific attributes in a class and automatically decorates methods with a logging wrapper. """ required_attributes = ['name', 'version'] def __new__(cls, name, bases, class_dict): """ Create a new class with enforced attributes and method logging. :param name: Name of the class being created. :param bases: Tuple of base classes. :param class_dict: Dictionary of attributes and methods of the class. :return: Newly created class object. """ # Ensure required attributes exist for attr in cls.required_attributes: if attr not in class_dict: raise TypeError(f"Class '{name}' is missing required attribute '{attr}'") # Wrap all methods in a logging decorator for key, value in class_dict.items(): if callable(value): # Check if it's a method class_dict[key] = cls.log_calls(value) return super().__new__(cls, name, bases, class_dict) @staticmethod def log_calls(func): """ Decorator that logs method calls and arguments. :param func: Function to be wrapped. :return: Wrapped function with logging. """ def wrapper(*args, **kwargs): print(f"Calling {func.__name__} with args={args} kwargs={kwargs}") result = func(*args, **kwargs) print(f"{func.__name__} returned {result}") return result return wrapper class PluginBase(metaclass=EnforceAttrsMeta): """ Base class for plugins that enforces required attributes and logging. """ name = "BasePlugin" version = "1.0" def run(self, data): """ Process the input data. :param data: The data to be processed. :return: Processed result. """ return f"Processed {data}" class CustomPlugin(PluginBase): """ Custom plugin that extends PluginBase and adheres to enforced rules. """ name = "CustomPlugin" version = "2.0" def run(self, data): """ Custom processing logic. :param data: The data to process. :return: Modified data. """ return f"Custom processing of {data}" # Uncommenting the following class definition will raise a TypeError # because 'version' attribute is missing. # class InvalidPlugin(PluginBase): # name = "InvalidPlugin" if __name__ == "__main__": # Instantiate and use the plugin plugin = CustomPlugin() print(plugin.run("example data")) --- Click the Box Game

Click the Box!

Score: 0

================================================ FILE: scripts/setups/vitest-setup-client.ts ================================================ ================================================ FILE: scripts/setups/vitest-setup-server.ts ================================================ import { vi, afterAll } from "vitest"; import dotenv from "dotenv"; import { resolve } from "path"; import fs from "fs"; import { MongoMemoryServer } from "mongodb-memory-server"; let mongoServer: MongoMemoryServer; // Load the .env file const envPath = resolve(__dirname, "../../.env"); dotenv.config({ path: envPath }); // Read the .env file content const envContent = fs.readFileSync(envPath, "utf-8"); // Parse the .env content const envVars = dotenv.parse(envContent); // Separate public and private variables const publicEnv = {}; const privateEnv = {}; for (const [key, value] of Object.entries(envVars)) { if (key.startsWith("PUBLIC_")) { publicEnv[key] = value; } else { privateEnv[key] = value; } } vi.mock("$env/dynamic/public", () => ({ env: publicEnv, })); vi.mock("$env/dynamic/private", async () => { mongoServer = await MongoMemoryServer.create(); return { env: { ...privateEnv, MONGODB_URL: mongoServer.getUri(), }, }; }); afterAll(async () => { if (mongoServer) { await mongoServer.stop(); } }); ================================================ FILE: scripts/updateLocalEnv.ts ================================================ import fs from "fs"; import yaml from "js-yaml"; const file = fs.readFileSync("chart/env/prod.yaml", "utf8"); // have to do a weird stringify/parse because of some node error const prod = JSON.parse(JSON.stringify(yaml.load(file))); const vars = prod.envVars as Record; let PUBLIC_CONFIG = ""; Object.entries(vars) // filter keys used in prod with the proxy .filter( ([key]) => ![ "XFF_DEPTH", "ADDRESS_HEADER", "APP_BASE", "PUBLIC_ORIGIN", "PUBLIC_SHARE_PREFIX", "ADMIN_CLI_LOGIN", ].includes(key) ) .forEach(([key, value]) => { PUBLIC_CONFIG += `${key}=\`${value}\`\n`; }); const SECRET_CONFIG = (fs.existsSync(".env.SECRET_CONFIG") ? fs.readFileSync(".env.SECRET_CONFIG", "utf8") : process.env.SECRET_CONFIG) ?? ""; // Prepend the content of the env variable SECRET_CONFIG let full_config = `${PUBLIC_CONFIG}\n${SECRET_CONFIG}`; // replace the internal proxy url with the public endpoint full_config = full_config.replaceAll( "https://internal.api-inference.huggingface.co", "https://router.huggingface.co/hf-inference" ); full_config = full_config.replaceAll("COOKIE_SECURE=`true`", "COOKIE_SECURE=`false`"); full_config = full_config.replaceAll("LOG_LEVEL=`debug`", "LOG_LEVEL=`info`"); full_config = full_config.replaceAll("NODE_ENV=`prod`", "NODE_ENV=`development`"); // Write full_config to .env.local fs.writeFileSync(".env.local", full_config); ================================================ FILE: server.log ================================================ /Users/vm/.venv/bin/python3: No module named uvicorn /Users/vm/.venv/bin/python3: No module named uvicorn ================================================ FILE: src/ambient.d.ts ================================================ declare module "*.ttf" { const value: ArrayBuffer; export default value; } // Legacy helpers removed: web search support is deprecated, so we intentionally // avoid leaking those shapes into the global ambient types. ================================================ FILE: src/app.d.ts ================================================ /// /// import type { User } from "$lib/types/User"; // See https://kit.svelte.dev/docs/types#app // for information about these interfaces declare global { namespace App { // interface Error {} interface Locals { sessionId: string; user?: User; isAdmin: boolean; token?: string; /** Organization to bill inference requests to (from settings) */ billingOrganization?: string; } interface Error { message: string; errorId?: ReturnType; } // interface PageData {} // interface Platform {} } } export {}; ================================================ FILE: src/app.html ================================================ %sveltekit.head%
%sveltekit.body%
================================================ FILE: src/hooks.server.ts ================================================ import { building } from "$app/environment"; import type { Handle, HandleServerError, ServerInit, HandleFetch } from "@sveltejs/kit"; import { initServer } from "$lib/server/hooks/init"; import { handleRequest } from "$lib/server/hooks/handle"; import { handleServerError } from "$lib/server/hooks/error"; import { handleFetchRequest } from "$lib/server/hooks/fetch"; export const init: ServerInit = async () => { if (building) return; return initServer(); }; export const handle: Handle = async (input) => { if (building) { // During static build, still replace %gaId% placeholder with empty string // to prevent the GA script from loading with an invalid ID return input.resolve(input.event, { transformPageChunk: ({ html }) => html.replace("%gaId%", ""), }); } return handleRequest(input); }; export const handleError: HandleServerError = async (input) => { if (building) throw input.error; return handleServerError(input); }; export const handleFetch: HandleFetch = async (input) => { if (building) return input.fetch(input.request); return handleFetchRequest(input); }; ================================================ FILE: src/hooks.ts ================================================ import { publicConfigTransporter } from "$lib/utils/PublicConfig.svelte"; import type { Transport } from "@sveltejs/kit"; export const transport: Transport = { PublicConfig: publicConfigTransporter, }; ================================================ FILE: src/lib/APIClient.ts ================================================ import { base } from "$app/paths"; import { browser } from "$app/environment"; import superjson from "superjson"; import ObjectId from "bson-objectid"; superjson.registerCustom( { isApplicable: (value): value is ObjectId => { if (typeof value !== "string" && ObjectId.isValid(value)) { const str = value.toString(); return /^[0-9a-fA-F]{24}$/.test(str); } return false; }, serialize: (value) => value.toString(), deserialize: (value) => new ObjectId(value), }, "ObjectId" ); type FetchFn = typeof globalThis.fetch; interface ApiResponse { data: T | null; error: unknown; status: number; } async function apiCall( fetcher: FetchFn, url: string, method: string, body?: unknown, query?: Record ): Promise> { const u = new URL(url); if (query) { for (const [k, v] of Object.entries(query)) { if (v !== undefined && v !== null) { u.searchParams.set(k, String(v)); } } } const init: RequestInit = { method }; if (body !== undefined && body !== null) { init.headers = { "Content-Type": "application/json" }; init.body = JSON.stringify(body); } const res = await fetcher(u.toString(), init); if (!res.ok) { let errorBody: unknown; try { errorBody = await res.json(); } catch { errorBody = await res.text().catch(() => res.statusText); } return { data: null, error: errorBody, status: res.status }; } // Handle empty responses (e.g. POST /user/settings returns empty body) const text = await res.text(); if (!text) { return { data: null, error: null, status: res.status }; } return { data: text as unknown as T, error: null, status: res.status }; } function endpoint(fetcher: FetchFn, baseUrl: string) { return { get(opts?: { query?: Record }) { return apiCall(fetcher, baseUrl, "GET", undefined, opts?.query); }, post(body?: unknown) { return apiCall(fetcher, baseUrl, "POST", body); }, patch(body?: unknown) { return apiCall(fetcher, baseUrl, "PATCH", body); }, delete() { return apiCall(fetcher, baseUrl, "DELETE"); }, }; } export function useAPIClient({ fetch: customFetch, origin, }: { fetch?: FetchFn; origin?: string; } = {}) { const fetcher = customFetch ?? globalThis.fetch; const baseUrl = browser ? `${window.location.origin}${base}/api/v2` : `${origin ?? `http://localhost:5173`}${base}/api/v2`; return { conversations: Object.assign( // client.conversations({ id: "..." }) — returns endpoint for /conversations/:id (params: { id: string }) => ({ ...endpoint(fetcher, `${baseUrl}/conversations/${params.id}`), message: (msgParams: { messageId: string }) => endpoint(fetcher, `${baseUrl}/conversations/${params.id}/message/${msgParams.messageId}`), }), // client.conversations.get(), .delete() { ...endpoint(fetcher, `${baseUrl}/conversations`), "import-share": endpoint(fetcher, `${baseUrl}/conversations/import-share`), } ), user: { ...endpoint(fetcher, `${baseUrl}/user`), settings: endpoint(fetcher, `${baseUrl}/user/settings`), reports: endpoint(fetcher, `${baseUrl}/user/reports`), "billing-orgs": endpoint(fetcher, `${baseUrl}/user/billing-orgs`), }, models: { ...endpoint(fetcher, `${baseUrl}/models`), old: endpoint(fetcher, `${baseUrl}/models/old`), refresh: endpoint(fetcher, `${baseUrl}/models/refresh`), }, "public-config": endpoint(fetcher, `${baseUrl}/public-config`), "feature-flags": endpoint(fetcher, `${baseUrl}/feature-flags`), debug: { config: endpoint(fetcher, `${baseUrl}/debug/config`), refresh: endpoint(fetcher, `${baseUrl}/debug/refresh`), }, export: endpoint(fetcher, `${baseUrl}/export`), }; } // eslint-disable-next-line @typescript-eslint/no-explicit-any export function handleResponse(response: ApiResponse): any { if (response.error) { throw new Error(JSON.stringify(response.error)); } if (response.data === null) { return null; } return superjson.parse( typeof response.data === "string" ? response.data : JSON.stringify(response.data) ); } ================================================ FILE: src/lib/actions/clickOutside.ts ================================================ export function clickOutside(element: HTMLElement, callbackFunction: () => void) { function onClick(event: MouseEvent) { if (!element.contains(event.target as Node)) { callbackFunction(); } } document.body.addEventListener("click", onClick); return { update(newCallbackFunction: () => void) { callbackFunction = newCallbackFunction; }, destroy() { document.body.removeEventListener("click", onClick); }, }; } ================================================ FILE: src/lib/actions/snapScrollToBottom.ts ================================================ import { navigating } from "$app/state"; import { tick } from "svelte"; // Threshold to determine if user is "at bottom" - larger value prevents false detachment const BOTTOM_THRESHOLD = 50; const USER_SCROLL_DEBOUNCE_MS = 150; const PROGRAMMATIC_SCROLL_GRACE_MS = 100; const TOUCH_DETACH_THRESHOLD_PX = 10; interface ScrollDependency { signal: unknown; forceReattach?: number; } type MaybeScrollDependency = ScrollDependency | unknown; const getForceReattach = (value: MaybeScrollDependency): number => { if (typeof value === "object" && value !== null && "forceReattach" in value) { return (value as ScrollDependency).forceReattach ?? 0; } return 0; }; /** * Auto-scroll action that snaps to bottom while respecting user scroll intent. * * Key behaviors: * 1. Uses wheel/touch events to detect actual user intent * 2. Uses IntersectionObserver on a sentinel element to reliably detect "at bottom" state * 3. Larger threshold to prevent edge-case false detachments * * @param node element to snap scroll to bottom * @param dependency pass in { signal, forceReattach } - signal triggers scroll updates, * forceReattach (counter) forces re-attachment when incremented */ export const snapScrollToBottom = (node: HTMLElement, dependency: MaybeScrollDependency) => { // --- State ---------------------------------------------------------------- // Track whether user has intentionally scrolled away from bottom let isDetached = false; // Track the last forceReattach value to detect changes let lastForceReattach = getForceReattach(dependency); // Track if user is actively scrolling (via wheel/touch) let userScrolling = false; let userScrollTimeout: ReturnType | undefined; // Track programmatic scrolls to avoid treating them as user scrolls let isProgrammaticScroll = false; let lastProgrammaticScrollTime = 0; // Track previous scroll position to detect scrollbar drags let prevScrollTop = node.scrollTop; // Touch handling state let touchStartY = 0; // Observers and sentinel let resizeObserver: ResizeObserver | undefined; let intersectionObserver: IntersectionObserver | undefined; let sentinel: HTMLDivElement | undefined; // Track content height for early-return optimization during streaming let lastScrollHeight = node.scrollHeight; // --- Helpers -------------------------------------------------------------- const clearUserScrollTimeout = () => { if (userScrollTimeout) { clearTimeout(userScrollTimeout); userScrollTimeout = undefined; } }; const distanceFromBottom = () => node.scrollHeight - node.scrollTop - node.clientHeight; const isAtBottom = () => distanceFromBottom() <= BOTTOM_THRESHOLD; const scrollToBottom = () => { isProgrammaticScroll = true; lastProgrammaticScrollTime = Date.now(); node.scrollTo({ top: node.scrollHeight }); if (typeof requestAnimationFrame === "function") { requestAnimationFrame(() => { isProgrammaticScroll = false; }); } else { isProgrammaticScroll = false; } }; const settleScrollAfterLayout = async () => { if (typeof requestAnimationFrame !== "function") return; const raf = () => new Promise((resolve) => requestAnimationFrame(() => resolve())); await raf(); if (!userScrolling && !isDetached) { scrollToBottom(); } await raf(); if (!userScrolling && !isDetached) { scrollToBottom(); } }; const scheduleUserScrollEndCheck = () => { userScrolling = true; clearUserScrollTimeout(); userScrollTimeout = setTimeout(() => { userScrolling = false; // If user scrolled back to bottom, re-attach if (isAtBottom()) { isDetached = false; } // Re-trigger scroll if still attached, to catch content that arrived during scrolling if (!isDetached) { scrollToBottom(); } }, USER_SCROLL_DEBOUNCE_MS); }; const createSentinel = () => { sentinel = document.createElement("div"); sentinel.style.height = "1px"; sentinel.style.width = "100%"; sentinel.setAttribute("aria-hidden", "true"); sentinel.setAttribute("data-scroll-sentinel", ""); // Find the content container (first child) and append sentinel there const container = node.firstElementChild; if (container) { container.appendChild(sentinel); } else { node.appendChild(sentinel); } }; const setupIntersectionObserver = () => { if (typeof IntersectionObserver === "undefined" || !sentinel) return; intersectionObserver = new IntersectionObserver( (entries) => { const entry = entries[0]; // If sentinel is visible and user isn't actively scrolling, we're at bottom if (entry?.isIntersecting && !userScrolling) { isDetached = false; // Immediately scroll to catch up with any content that arrived while detached scrollToBottom(); } }, { root: node, threshold: 0, rootMargin: `0px 0px ${BOTTOM_THRESHOLD}px 0px`, } ); intersectionObserver.observe(sentinel); }; const setupResizeObserver = () => { if (typeof ResizeObserver === "undefined") return; const target = node.firstElementChild ?? node; resizeObserver = new ResizeObserver(() => { // Don't auto-scroll if user has detached and we're not navigating if (isDetached && !navigating.to) return; // Don't interrupt active user scrolling if (userScrolling) return; scrollToBottom(); }); resizeObserver.observe(target); }; // --- Action update logic -------------------------------------------------- const handleForceReattach = async (newDependency: MaybeScrollDependency) => { const forceReattach = getForceReattach(newDependency); if (forceReattach > lastForceReattach) { lastForceReattach = forceReattach; isDetached = false; userScrolling = false; clearUserScrollTimeout(); await tick(); scrollToBottom(); return true; } return false; }; async function updateScroll(newDependency?: MaybeScrollDependency) { // 1. Explicit force re-attach if (newDependency && (await handleForceReattach(newDependency))) { return; } // 2. Don't scroll if user has detached and we're not navigating if (isDetached && !navigating.to) return; // 3. Don't scroll if user is actively scrolling if (userScrolling) return; // 4. Early return if already at bottom and no content change (perf optimization for streaming) const currentHeight = node.scrollHeight; if (isAtBottom() && currentHeight === lastScrollHeight) { return; } lastScrollHeight = currentHeight; // 5. Wait for DOM to update, then scroll and settle after layout shifts await tick(); scrollToBottom(); await settleScrollAfterLayout(); } // --- Event handlers ------------------------------------------------------- // Detect user scroll intent via wheel events (mouse/trackpad) const handleWheel = (event: WheelEvent) => { const { deltaY } = event; // User is scrolling up - detach if (deltaY < 0) { isDetached = true; } // User is scrolling down - check for re-attachment immediately // This ensures fast re-attachment when user scrolls to bottom during fast generation if (deltaY > 0 && isAtBottom()) { isDetached = false; userScrolling = false; clearUserScrollTimeout(); scrollToBottom(); return; } scheduleUserScrollEndCheck(); }; // Detect user scroll intent via touch events (mobile) const handleTouchStart = (event: TouchEvent) => { touchStartY = event.touches[0]?.clientY ?? 0; }; const handleTouchMove = (event: TouchEvent) => { const touchY = event.touches[0]?.clientY ?? 0; const deltaY = touchStartY - touchY; // User is scrolling up (finger moving down) if (deltaY < -TOUCH_DETACH_THRESHOLD_PX) { isDetached = true; } // User is scrolling down (finger moving up) - check for re-attachment immediately if (deltaY > TOUCH_DETACH_THRESHOLD_PX && isAtBottom()) { isDetached = false; userScrolling = false; clearUserScrollTimeout(); scrollToBottom(); touchStartY = touchY; return; } scheduleUserScrollEndCheck(); touchStartY = touchY; }; // Handle scroll events to detect scrollbar usage and re-attach when at bottom const handleScroll = () => { const now = Date.now(); const timeSinceLastProgrammaticScroll = now - lastProgrammaticScrollTime; const inGracePeriod = isProgrammaticScroll || timeSinceLastProgrammaticScroll < PROGRAMMATIC_SCROLL_GRACE_MS; // If not from wheel/touch, this is likely a scrollbar drag if (!userScrolling) { const scrollingUp = node.scrollTop < prevScrollTop; // Always allow detach (scrolling up) - don't ignore user intent if (scrollingUp) { isDetached = true; } // Only re-attach when at bottom if NOT in grace period // (avoids false re-attach from content resize pushing scroll position) if (!inGracePeriod && isAtBottom()) { isDetached = false; // Immediately scroll to catch up with any content that arrived while detached scrollToBottom(); } } prevScrollTop = node.scrollTop; }; // --- Setup ---------------------------------------------------------------- node.addEventListener("wheel", handleWheel, { passive: true }); node.addEventListener("touchstart", handleTouchStart, { passive: true }); node.addEventListener("touchmove", handleTouchMove, { passive: true }); node.addEventListener("scroll", handleScroll, { passive: true }); createSentinel(); setupIntersectionObserver(); setupResizeObserver(); // Initial scroll if we have content if (dependency) { void (async () => { await tick(); scrollToBottom(); })(); } // --- Cleanup -------------------------------------------------------------- return { update: updateScroll, destroy: () => { clearUserScrollTimeout(); node.removeEventListener("wheel", handleWheel); node.removeEventListener("touchstart", handleTouchStart); node.removeEventListener("touchmove", handleTouchMove); node.removeEventListener("scroll", handleScroll); resizeObserver?.disconnect(); intersectionObserver?.disconnect(); sentinel?.remove(); }, }; }; ================================================ FILE: src/lib/buildPrompt.ts ================================================ import type { EndpointParameters } from "./server/endpoints/endpoints"; import type { BackendModel } from "./server/models"; type buildPromptOptions = Pick & { model: BackendModel; }; export async function buildPrompt({ messages, model, preprompt, }: buildPromptOptions): Promise { const filteredMessages = messages; if (filteredMessages[0].from === "system" && preprompt) { filteredMessages[0].content = preprompt; } const prompt = model .chatPromptRender({ messages: filteredMessages.map((m) => ({ ...m, role: m.from, })), preprompt, }) // Not super precise, but it's truncated in the model's backend anyway .split(" ") .slice(-(model.parameters?.truncate ?? 0)) .join(" "); return prompt; } ================================================ FILE: src/lib/components/AnnouncementBanner.svelte ================================================
New {title}
{@render children?.()}
================================================ FILE: src/lib/components/BackgroundGenerationPoller.svelte ================================================ ================================================ FILE: src/lib/components/CodeBlock.svelte ================================================
{#if showPreview} {/if}
{@html DOMPurify.sanitize(code)}
{#if previewOpen} (previewOpen = false)} /> {/if}
================================================ FILE: src/lib/components/CopyToClipBoardBtn.svelte ================================================ ================================================ FILE: src/lib/components/DeleteConversationModal.svelte ================================================ {#if open}

Delete conversation

Are you sure you want to delete "{title}"? This action cannot be undone.

{/if} ================================================ FILE: src/lib/components/EditConversationModal.svelte ================================================ {#if open}
{ e.preventDefault(); save(); }} >

Rename conversation

(newTitle = (e.currentTarget as HTMLInputElement).value)} class="w-full rounded-xl border border-gray-200 bg-white px-3 py-2 text-[15px] text-gray-800 outline-none placeholder:text-gray-400 focus:ring-2 focus:ring-gray-200 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-100 dark:placeholder:text-gray-500 dark:focus:ring-gray-700" placeholder="Enter a title" />
{/if} ================================================ FILE: src/lib/components/ExpandNavigation.svelte ================================================ ================================================ FILE: src/lib/components/HoverTooltip.svelte ================================================
{@render children?.()}
================================================ FILE: src/lib/components/HtmlPreviewModal.svelte ================================================ onclose?.()} >
{#if errors.length > 0} {/if}
================================================ FILE: src/lib/components/InfiniteScroll.svelte ================================================
================================================ FILE: src/lib/components/MobileNav.svelte ================================================ {#if isOpen || isDragging} {/if} ================================================ FILE: src/lib/components/Modal.svelte ================================================
{ e.stopPropagation(); handleBackdropClick(e); }} transition:fade|local={{ easing: cubicOut, duration: 300 }} class="fixed inset-0 z-40 flex items-center justify-center bg-black/80 backdrop-blur-sm dark:bg-black/50" > {#if disableFly} {:else} {/if}
================================================ FILE: src/lib/components/ModelCardMetadata.svelte ================================================
Model
 page
{#if model.datasetName || model.datasetUrl} Dataset
 page
{/if} {#if model.hasInferenceAPI} API {/if} {#if model.websiteUrl} {#if model.name.startsWith("meta-llama/Meta-Llama")} Built with Llama {:else} Website {/if} {/if}
================================================ FILE: src/lib/components/NavConversationItem.svelte ================================================ { if (e.detail >= 2) { e.preventDefault(); startInlineEdit(); } }} > {#if inlineEditing} (inlineTitle = (e.currentTarget as HTMLInputElement).value)} onkeydown={(e) => { if (e.key === "Enter") { e.preventDefault(); commitInlineEdit(); } else if (e.key === "Escape") { e.preventDefault(); cancelInlineEdit(); } }} onblur={commitInlineEdit} onclick={(e) => e.preventDefault()} class="my-0 h-full min-w-0 flex-1 truncate border-none bg-transparent p-0 text-inherit outline-none first-letter:uppercase focus:ring-0" /> {:else}
{conv.title}
{/if} {#if !readOnly && !inlineEditing} {/if}
{#if renameOpen} (renameOpen = false)} onsave={(payload) => { renameOpen = false; oneditConversationTitle?.({ id: conv.id.toString(), title: payload.title }); }} /> {/if} {#if deleteOpen} (deleteOpen = false)} ondelete={() => { deleteOpen = false; ondeleteConversation?.(conv.id.toString()); }} /> {/if} ================================================ FILE: src/lib/components/NavMenu.svelte ================================================
{#each Object.entries(groupedConversations) as [group, convs]} {#if convs.length}

{titles[group]}

{#each convs as conv} {/each} {/if} {/each}
{#if hasMore} {/if}
{#if user?.username || user?.email}
{user?.username || user?.email} {#if publicConfig.isHuggingChat && $isPro === false} Get PRO {:else if publicConfig.isHuggingChat && $isPro === true} PRO {/if}
{/if} Models {nModels} {#if user?.username || user?.email} {/if} Settings
{#if showMcpModal} (showMcpModal = false)} /> {/if} ================================================ FILE: src/lib/components/Pagination.svelte ================================================ {#if numTotalPages > 1} {/if} ================================================ FILE: src/lib/components/PaginationArrow.svelte ================================================ {#if direction === "previous"} Previous {:else} Next {/if} ================================================ FILE: src/lib/components/Portal.svelte ================================================ ================================================ FILE: src/lib/components/RetryBtn.svelte ================================================ ================================================ FILE: src/lib/components/ScrollToBottomBtn.svelte ================================================ {#if visible} {/if} ================================================ FILE: src/lib/components/ScrollToPreviousBtn.svelte ================================================ {#if visible} {/if} ================================================ FILE: src/lib/components/ShareConversationModal.svelte ================================================ {#if open}
{#if createdUrl}
Public link created
A public link to your chat has been created.
{:else}
Share public link to chat
Any messages you add after sharing stay private.
{/if} {#if errorMsg}
{errorMsg}
{/if}
{#if createdUrl} { justCopied = true; oncopied?.(); setTimeout(() => (justCopied = false), 1200); }} > {#snippet children()} {#if justCopied} Copied {:else} Copy link {/if} {/snippet} {:else} {/if}
{/if} ================================================ FILE: src/lib/components/StopGeneratingBtn.svelte ================================================ ================================================ FILE: src/lib/components/SubscribeModal.svelte ================================================
{#if $isPro} {:else} {/if}

{$isPro ? "Out of Credits" : "Upgrade Required"}

{#if $isPro}

You've used all your available credits. Purchase additional credits to continue using HuggingChat.

Your credits can be used in other HF services and external apps via Inference Providers.

{:else}

You've reached your message limit. Upgrade to Hugging Face PRO to continue using HuggingChat.

It's also possible to use your PRO credits in your favorite AI tools.

{/if}
{#if $isPro} Purchase Credits {:else} Upgrade to Pro {/if}
================================================ FILE: src/lib/components/Switch.svelte ================================================
================================================ FILE: src/lib/components/SystemPromptModal.svelte ================================================ {#if isOpen} (isOpen = false)} width="w-full !max-w-xl">

System Prompt

{/if} ================================================ FILE: src/lib/components/Toast.svelte ================================================

{message}

================================================ FILE: src/lib/components/Tooltip.svelte ================================================
{label}
================================================ FILE: src/lib/components/WelcomeModal.svelte ================================================
Omni AI model router animation
Now with MCP!

Welcome to {publicConfig.PUBLIC_APP_NAME}, the chat app powered by open source AI models.

Omni automatically picks the best AI model to give you optimal answers depending on your requests.

You can also choose from any available open source models to chat with directly.

================================================ FILE: src/lib/components/chat/Alternatives.svelte ================================================
{currentIdx + 1} / {alternatives.length}
================================================ FILE: src/lib/components/chat/BlockWrapper.svelte ================================================
{@render icon()} {#if loading} {/if}
{#if hasNext}
{/if}
{@render children()}
================================================ FILE: src/lib/components/chat/ChatInput.svelte ================================================
{#if !showNoTools}
{#if showFileUpload}
{ if (requireAuthUser()) { e.preventDefault(); } }} accept={mimeTypes.join(",")} /> { if (open && requireAuthUser()) { isDropdownOpen = false; return; } isDropdownOpen = open; }} > e.preventDefault()} interactOutsideBehavior="defer-otherwise-close" > {#if modelIsMultimodal} openFilePickerImage()} > Add image(s) {/if}
Add text file
e.preventDefault()} interactOutsideBehavior="defer-otherwise-close" > openFilePickerText()} > Upload from device (isUrlModalOpen = true)} > Fetch from URL
MCP Servers
e.preventDefault()} interactOutsideBehavior="defer-otherwise-close" > {#each $allMcpServers as server (server.id)} toggleServer(server.id)} closeOnSelect={false} class="flex h-9 select-none items-center gap-2 rounded-md px-2 text-sm leading-none text-gray-800 data-[highlighted]:bg-gray-100 focus-visible:outline-none dark:text-gray-100 dark:data-[highlighted]:bg-white/10" > {#snippet children({ checked })} {server.name}
{/snippet}
{/each} {#if $allMcpServers.length > 0} {/if} (isMcpManagerOpen = true)} > Manage MCP Servers
{#if $enabledServersCount > 0}
{/if}
{/if}
{/if} {@render children?.()} {#if isMcpManagerOpen} (isMcpManagerOpen = false)} /> {/if}
================================================ FILE: src/lib/components/chat/ChatIntroduction.svelte ================================================
{publicConfig.PUBLIC_APP_NAME}
================================================ FILE: src/lib/components/chat/ChatMessage.svelte ================================================ {#if message.from === "assistant"} {#if lightboxSrc} (lightboxSrc = null)} /> {/if} {/if} {#if message.from === "user"} {/if} ================================================ FILE: src/lib/components/chat/ChatWindow.svelte ================================================ { e.preventDefault(); }} ondrop={(e) => { e.preventDefault(); onDrag = false; }} />
{#if shareModalOpen} shareModal.close()} /> {/if}
{#if preprompt && preprompt != currentModel.preprompt} {/if} {#if messages.length > 0}
{#each messages as message, idx (message.id)} a.includes(message.id)) ?? []} isAuthor={!shared} readOnly={isReadOnly} isLast={idx === messages.length - 1} bind:editMsdgId onretry={(payload) => onretry?.(payload)} onshowAlternateMsg={(payload) => onshowAlternateMsg?.(payload)} /> {/each} {#if isReadOnly} {/if}
{:else if pending} {:else} { onmessage?.(content); }} /> {/if}
{#if !draft.length && !messages.length && !sources.length && !loading && (currentModel.isRouter || (modelSupportsTools && $allBaseServersEnabled)) && activeExamples.length && !hideRouterExamples && !lastIsError && $mcpServersLoaded}
{#each activeExamples as ex} {/each}
{/if} {#if shouldShowRouterFollowUps && !lastIsError}
{#each routerFollowUps as followUp} {/each}
{/if} {#if sources?.length && !loading}
{#each sources as source, index} {#await source then src} { files = files.filter((_, i) => i !== index); }} /> {/await} {/each}
{/if}
{#if !loading && lastIsError} { if (lastMessage && lastMessage.ancestors) { onretry?.({ id: lastMessage.id, }); } }} /> {/if}
{ e.preventDefault(); handleSubmit(); }} class={{ "relative flex w-full max-w-4xl flex-1 items-center rounded-xl border bg-gray-100 dark:border-gray-700 dark:bg-gray-800": true, "opacity-30": isReadOnly, "max-sm:mb-4": focused && isVirtualKeyboard(), }} > {#if isRecording || isTranscribing} { isRecording = false; }} onconfirm={handleRecordingConfirm} onsend={handleRecordingSend} onerror={handleRecordingError} /> {:else if onDrag && isFileUploadEnabled} {:else}
{#if lastIsError} {:else} {/if} {#if loading} { hapticError(); onstop?.(); }} showBorder={true} classNames="absolute bottom-2 right-2 size-8 sm:size-7 self-end rounded-full border bg-white text-black shadow transition-none dark:border-transparent dark:bg-gray-600 dark:text-white" /> {:else} {#if transcriptionEnabled} {/if} {/if}
{/if}
{#if models.find((m) => m.id === currentModel.id)} {#if loading && streamingToolCallName} Calling tool {availableTools.find((t) => t.name === streamingToolCallName)?.displayName ?? streamingToolCallName} {:else if !currentModel.isRouter || !loading} { if (requireAuthUser()) { e.preventDefault(); } }} class="inline-flex items-center gap-1 hover:underline" > {#if currentModel.isRouter} {currentModel.displayName} {:else} Model: {currentModel.displayName} {#if hasProviderOverride} {@const hubOrg = PROVIDERS_HUB_ORGS[providerOverride as keyof typeof PROVIDERS_HUB_ORGS]} {#if providerOverride === "fastest"} {:else if providerOverride === "cheapest"} {:else if hubOrg} {providerOverride} {/if} {/if} {/if} {:else if showRouterDetails && streamingRouterMetadata?.route}
{streamingRouterMetadata.route} with {streamingRouterModelName}
{:else}
Routing
{/if} {:else} {currentModel.id} {/if} {#if !messages.length && !loading} Generated content may be inaccurate or false. {/if}
================================================ FILE: src/lib/components/chat/FileDropzone.svelte ================================================
(onDragInner = true)} ondragleave={() => (onDragInner = false)} ondragover={(e) => { e.preventDefault(); }} class="relative flex h-28 w-full max-w-4xl flex-col items-center justify-center gap-1 rounded-xl border-2 border-dotted {onDragInner ? 'border-blue-200 !bg-blue-600/10 text-blue-600 *:pointer-events-none dark:border-blue-600 dark:bg-blue-600/20 dark:text-blue-600' : 'bg-gray-100 text-gray-500 dark:border-gray-500 dark:bg-gray-700 dark:text-gray-400'}" >

Drop File to add to chat

================================================ FILE: src/lib/components/chat/ImageLightbox.svelte ================================================
e.stopPropagation()} />
================================================ FILE: src/lib/components/chat/MarkdownBlock.svelte ================================================ {#each renderedTokens as token} {#if token.type === "text"} {@html token.html} {:else if token.type === "code"} {/if} {/each} ================================================ FILE: src/lib/components/chat/MarkdownRenderer.svelte ================================================ {#each blocks as block, index (loading && index === blocks.length - 1 ? `stream-${index}` : block.id)} {/each} ================================================ FILE: src/lib/components/chat/MarkdownRenderer.svelte.test.ts ================================================ import MarkdownRenderer from "./MarkdownRenderer.svelte"; import { render } from "vitest-browser-svelte"; import { page } from "@vitest/browser/context"; import { describe, expect, it } from "vitest"; describe("MarkdownRenderer", () => { it("renders", () => { render(MarkdownRenderer, { content: "Hello, world!" }); expect(page.getByText("Hello, world!")).toBeInTheDocument(); }); it("renders headings", () => { render(MarkdownRenderer, { content: "# Hello, world!" }); expect(page.getByRole("heading", { level: 1 })).toBeInTheDocument(); }); it("renders links", () => { render(MarkdownRenderer, { content: "[Hello, world!](https://example.com)" }); const link = page.getByRole("link", { name: "Hello, world!" }); expect(link).toBeInTheDocument(); expect(link).toHaveAttribute("href", "https://example.com"); expect(link).toHaveAttribute("target", "_blank"); expect(link).toHaveAttribute("rel", "noreferrer"); }); it("renders inline codespans", () => { render(MarkdownRenderer, { content: "`foobar`" }); expect(page.getByRole("code")).toHaveTextContent("foobar"); }); it("renders block codes", () => { render(MarkdownRenderer, { content: "```foobar```" }); expect(page.getByRole("code")).toHaveTextContent("foobar"); }); it("doesnt render raw html directly", () => { render(MarkdownRenderer, { content: "" }); expect(page.getByRole("button").elements).toHaveLength(0); // htmlparser2 escapes disallowed tags expect(page.getByRole("paragraph")).toHaveTextContent(""); }); it("renders latex", () => { const { baseElement } = render(MarkdownRenderer, { content: "$(oo)^2$" }); expect(baseElement.querySelectorAll(".katex")).toHaveLength(1); }); it("does not render latex in code blocks", () => { const { baseElement } = render(MarkdownRenderer, { content: "```\n$(oo)^2$\n```" }); expect(baseElement.querySelectorAll(".katex")).toHaveLength(0); }); it("does not render latex in inline codes", () => { const { baseElement } = render(MarkdownRenderer, { content: "`$oo` and `$bar`" }); expect(baseElement.querySelectorAll(".katex")).toHaveLength(0); }); it("does not render latex across multiple lines", () => { const { baseElement } = render(MarkdownRenderer, { content: "* $oo \n* $aa" }); expect(baseElement.querySelectorAll(".katex")).toHaveLength(0); }); it("renders latex with some < and > symbols", () => { const { baseElement } = render(MarkdownRenderer, { content: "$foo < bar > baz$" }); expect(baseElement.querySelectorAll(".katex")).toHaveLength(1); }); }); ================================================ FILE: src/lib/components/chat/MessageAvatar.svelte ================================================ ================================================ FILE: src/lib/components/chat/ModelSwitch.svelte ================================================
This model is no longer available. Switch to a new one to continue this conversation:
================================================ FILE: src/lib/components/chat/OpenReasoningResults.svelte ================================================ {#snippet icon()} {/snippet} ================================================ FILE: src/lib/components/chat/ToolUpdate.svelte ================================================ {#snippet icon()} {#if toolSuccess} {:else} {/if} {/snippet} {#if toolFnName}
{#if isOpen}
{#each tool as update, i (`${update.subtype}-${i}`)} {#if update.subtype === MessageToolUpdateType.Call}
Input
{formatValue(
										update.call.parameters
									)}
{:else if update.subtype === MessageToolUpdateType.Error}
Error
{update.message}
{:else if isMessageToolResultUpdate(update) && update.result.status === ToolResultStatus.Success && update.result.display}
Output
{#each parseToolOutputs(update.result.outputs) as parsedOutput}
{#if parsedOutput.text}
{parsedOutput.text}
{/if} {#if parsedOutput.images.length > 0}
{#each parsedOutput.images as image, imageIndex} {`Tool {/each}
{/if} {#if parsedOutput.metadata.length > 0}
{formatValue(
													Object.fromEntries(parsedOutput.metadata)
												)}
{/if}
{/each}
{:else if isMessageToolResultUpdate(update) && update.result.status === ToolResultStatus.Error && update.result.display}
Error
{update.result
										.message}
{/if} {/each}
{/if}
{/if} ================================================ FILE: src/lib/components/chat/UploadedFile.svelte ================================================ {#if showModal && isClickable} (showModal = false)}> {#if isImage(file.mime)} {#if file.type === "hash"} input from user {:else} input from user {/if} {:else if isPlainText(file.mime)}

{file.name}

{#if file.mime === "application/vnd.chatui.clipboard"}

If you prefer to inject clipboard content directly in the chat, you can disable this feature in the settings page.

{/if} {#if file.type === "hash"} {#await fetch(urlNotTrailing + "/output/" + file.value).then((res) => res.text())}
{:then result}
{result}
{/await} {:else}
{atob(file.value)}
{/if}
{/if}
{/if}
isClickable && (showModal = true)} onkeydown={(e) => { if (!isClickable) { return; } if (e.key === "Enter" || e.key === " ") { showModal = true; } }} class:clickable={isClickable} role="button" tabindex="0" >
{#if isImage(file.mime)}
{file.name}
{:else if isAudio(file.mime)} {:else if isVideo(file.mime)}
{:else if isPlainText(file.mime)}
{truncateMiddle(file.name, 28)}
{#if file.mime === "application/vnd.chatui.clipboard"}
Clipboard source
{:else}
{file.mime}
{/if}
{:else if file.mime === "application/octet-stream"}
{truncateMiddle(file.name, 28)}
File type could not be determined
{:else}
{truncateMiddle(file.name, 28)}
{file.mime}
{/if} {#if canClose} {/if}
================================================ FILE: src/lib/components/chat/UrlFetchModal.svelte ================================================ {#if open} {#snippet children()}
{ e.preventDefault(); handleSubmit(); }} >

Add from URL

{ if (e.key === "Enter") { e.preventDefault(); handleSubmit(); } }} />
{#if errorMsg}

{errorMsg}

{/if}

Only HTTPS. Max 10MB.

{/snippet}
{/if} ================================================ FILE: src/lib/components/chat/VoiceRecorder.svelte ================================================
{#if isTranscribing}
{:else} {/if}
================================================ FILE: src/lib/components/icons/IconBurger.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconCheap.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconChevron.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconDazzled.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconFast.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconLoading.svelte ================================================
================================================ FILE: src/lib/components/icons/IconMCP.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconMoon.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconNew.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconOmni.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconPaperclip.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconPro.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconShare.svelte ================================================ ================================================ FILE: src/lib/components/icons/IconSun.svelte ================================================ ================================================ FILE: src/lib/components/icons/Logo.svelte ================================================ {publicConfig.PUBLIC_APP_NAME} logo ================================================ FILE: src/lib/components/icons/LogoHuggingFaceBorderless.svelte ================================================ ================================================ FILE: src/lib/components/mcp/AddServerForm.svelte ================================================
HTTP Headers (Optional)
{#if headers.length === 0}

No headers configured

{:else} {#each headers as header, i}
{#if isSensitiveHeader(header.key)} {/if}
{/each} {/if}

Common examples:
• Bearer token: Authorization: Bearer YOUR_TOKEN
• API key: X-API-Key: YOUR_KEY

Be careful with custom MCP servers.

They receive your requests (including conversation context and any headers you add) and can run powerful tools on your behalf. Only add servers you trust and review their source. Never share confidental informations.

{#if error}

{error}

{/if}
================================================ FILE: src/lib/components/mcp/MCPServerManager.svelte ================================================

{#if currentView === "list"} MCP Servers {:else} Add MCP server {/if}

{#if currentView === "list"} Manage MCP servers to extend {publicConfig.PUBLIC_APP_NAME} with external tools. {:else} Add a custom MCP server to {publicConfig.PUBLIC_APP_NAME}. {/if}

{#if currentView === "list"}

{$allMcpServers.length} {$allMcpServers.length === 1 ? "server" : "servers"} configured

{enabledCount} enabled

{#if baseServers.length > 0}

Base Servers ({baseServers.length})

{#each baseServers as server (server.id)} {/each}
{/if}

Custom Servers ({customServers.length})

{#if customServers.length === 0}

No custom servers yet

Add your own MCP servers with custom tools

{:else}
{#each customServers as server (server.id)} {/each}
{/if}

💡 Quick Tips

  • • Only connect to servers you trust
  • • Enable servers to make their tools available in chat
  • • Use the Health Check button to verify server connectivity
  • • You can add HTTP headers for authentication when required
{:else if currentView === "add"} {/if}
================================================ FILE: src/lib/components/mcp/ServerCard.svelte ================================================

{server.name}

{server.url}

isSelected, setEnabled} />
{#if server.status}
{#if server.status === "connected"} {:else if server.status === "connecting"} {:else if server.status === "error"} {:else} {/if} {statusInfo.label} {#if server.tools && server.tools.length > 0} {server.tools.length} {server.tools.length === 1 ? "tool" : "tools"} {/if}
{/if} {#if server.errorMessage}
{server.errorMessage}
{/if}
{#if isHfMcp} Settings {/if} {#if server.type === "custom"} {/if}
{#if server.tools && server.tools.length > 0}
Available Tools ({server.tools.length})
    {#each server.tools as tool}
  • {tool.name} {#if tool.description} - {tool.description} {/if}
  • {/each}
{/if}
================================================ FILE: src/lib/components/players/AudioPlayer.svelte ================================================
{name}
{#if duration !== Infinity}
{format(time)}
{ paused = true; }} onpointerup={seek} >
{duration ? format(duration) : "--:--"}
{/if}
================================================ FILE: src/lib/components/voice/AudioWaveform.svelte ================================================
{#each timeline as height, i (i)}
{/each}
================================================ FILE: src/lib/constants/mcpExamples.ts ================================================ import type { RouterExample } from "./routerExamples"; // Examples that showcase MCP tool capabilities (web search, Hugging Face, etc.) export const mcpExamples: RouterExample[] = [ { title: "Generate an image", prompt: "Generate an image of a zebra in front of a volcanic eruption", }, { title: "Latest world news", prompt: "What is the latest world news?", followUps: [ { title: "Tech focus", prompt: "What about technology news?", }, { title: "San Francisco", prompt: "What's happening in San Francisco?", }, { title: "vs last week", prompt: "How does this compare to last week's news?", }, ], }, { title: "Trending models", prompt: "What are the top trending models on Hugging Face?", followUps: [ { title: "Text generation", prompt: "What about text generation models?", }, { title: "Image generation", prompt: "What about text-to-image models?", }, { title: "How to use", prompt: "Show me how to use the most popular one", }, ], }, { title: "Plan a trip", prompt: "Things to do in Tokyo next week", followUps: [ { title: "Transport & prices", prompt: "How do I get around and how much will it cost?", }, { title: "Weather", prompt: "What's the weather like in Tokyo next week?", }, { title: "Meet people", prompt: "Where can I meet new people and make friends?", }, ], }, { title: "Compare technologies", prompt: "Search the web to compare React, Vue, and Svelte for building web apps in 2025", followUps: [ { title: "Performance benchmarks", prompt: "Search for recent performance benchmarks comparing these frameworks", }, { title: "Job market", prompt: "Search for job market trends for each of these frameworks", }, { title: "Migration guides", prompt: "Search for guides on migrating from React to Svelte", }, ], }, { title: "Find a dataset", prompt: "Find datasets on Hugging Face for training a sentiment analysis model", followUps: [ { title: "Dataset details", prompt: "Tell me more about the largest dataset - its size, format, and how to load it", }, { title: "Find models", prompt: "Find pre-trained models that were trained on this dataset", }, { title: "Code snippet", prompt: "Show me how to load and preprocess this dataset with the datasets library", }, ], }, { title: "Gift ideas", prompt: "Search for unique gift ideas for someone who loves cooking", followUps: [ { title: "Budget options", prompt: "Search for gift ideas under $50", }, { title: "Top rated", prompt: "Search for the top-rated cooking gadgets of this year", }, { title: "DIY gifts", prompt: "Search for homemade gift ideas for cooking enthusiasts", }, ], }, { title: "Learn something new", prompt: "Search for the best resources to learn Rust programming in 2025", followUps: [ { title: "Project ideas", prompt: "Search for beginner Rust project ideas to practice with", }, { title: "Find tools", prompt: "Search for the most popular Rust tools and libraries I should know about", }, { title: "Community", prompt: "Search for Rust communities and forums where I can ask questions", }, ], }, ]; ================================================ FILE: src/lib/constants/mime.ts ================================================ // Centralized MIME allowlists used across client and server // Keep these lists minimal and consistent with server processing. export const TEXT_MIME_ALLOWLIST = [ "text/*", "application/json", "application/xml", "application/csv", ] as const; export const IMAGE_MIME_ALLOWLIST_DEFAULT = ["image/jpeg", "image/png"] as const; ================================================ FILE: src/lib/constants/pagination.ts ================================================ export const CONV_NUM_PER_PAGE = 30; ================================================ FILE: src/lib/constants/publicSepToken.ts ================================================ export const PUBLIC_SEP_TOKEN = ""; ================================================ FILE: src/lib/constants/routerExamples.ts ================================================ export type RouterFollowUp = { title: string; prompt: string; }; export type RouterExampleAttachment = { src: string; }; export type RouterExample = { title: string; prompt: string; followUps?: RouterFollowUp[]; attachments?: RouterExampleAttachment[]; }; export const routerExamples: RouterExample[] = [ { title: "HTML game", prompt: "Code a minimal Flappy Bird game using HTML and Canvas", followUps: [ { title: "README.md file", prompt: "Create a comprehensive README.md for the Flappy Bird game project.", }, { title: "CRT Screen", prompt: "Add a CRT screen effect to the game", }, { title: "Add power-ups", prompt: "Add collectible coins between pipes that award bonus points and a shield power-up that allows one collision.", }, { title: "Explain collision detection", prompt: "Explain the collision detection algorithm for the bird and pipes in simple terms with examples.", }, ], }, { title: "Weird painting", prompt: "is this a real painting?", attachments: [ { src: "huggingchat/castle-example.jpg", }, ], }, { title: "Landing page", prompt: "Build a responsive SaaS landing page for my AI coding assitant using Tailwind CSS. With a hero, features, testimonials, and pricing sections.", followUps: [ { title: "Dark mode", prompt: "Add dark mode and make it the default", }, { title: "Write blog post", prompt: "Write a blog post introducing my service.", }, { title: "Translate to Italian", prompt: "Translate only the text content displayed to users into Italian.", }, { title: "Architecture review", prompt: "Review the architecture and suggest improvements for scalability, SEO optimization, and performance.", }, ], }, { title: "Eminem song", prompt: "Write an Eminem-style rap battling AI taking over hip-hop, with two energetic verses and a catchy hook.", followUps: [ { title: "Psychological analysis", prompt: "Provide a psychological analysis of Eminem's emotions in this song.", }, { title: "Wired Article", prompt: "Write an article in the style of Wired explaining this Eminem release.", }, { title: "Roleplay", prompt: "Roleplay as Eminem so I can discuss the song with him.", }, { title: "Translate to Spanish", prompt: "Translate the rap lyrics to Spanish while maintaining the rhyme scheme and flow.", }, ], }, { title: "Act as Yoda", prompt: "Act as Yoda", followUps: [ { title: "Give advice", prompt: "Continue acting as Yoda and offer three pieces of life advice for staying focused under pressure.", }, { title: "Explain the Force", prompt: "In Yoda's voice, explain the concept of the Force to a young padawan using modern language.", }, { title: "Plain English", prompt: "Rewrite the previous response from Yoda into plain English while keeping the same meaning.", }, { title: "Compare philosophies", prompt: "Compare Yoda's Jedi philosophy to Stoic philosophy from ancient Greece and explain the similarities and differences.", }, ], }, { title: "Generate prompts", prompt: `Generate 5 creative prompts Text-to-image prompts like: "Cyberpunk cityscape at night, neon lights, flying cars, rain-slicked streets, blade runner aesthetic, highly detailed`, followUps: [ { title: "Turn into JSON", prompt: `Generate a detailed JSON object for each prompt. Include fields for subjects (list of objects), scene (setting, environment, background details), actions (what's happening), style (artistic style or medium)`, }, { title: "Sci-fi portraits", prompt: "Produce five futuristic character portrait prompts with unique professions and settings.", }, { title: "Explain image generation", prompt: "Explain how text-to-image diffusion models work, covering the denoising process and how text prompts guide generation.", }, ], }, { title: "Explain LLMs", prompt: "Explain how large language models based on transformers work, covering attention, embeddings, and training objectives.", followUps: [ { title: "Generate a Quiz", prompt: "Craft a 5-question multiple-choice quiz to validate what I learned.", }, { title: "Compare to RNNs", prompt: "Compare transformer-based large language models to recurrent neural networks, focusing on training efficiency and capabilities.", }, { title: "Student summary", prompt: "Summarize the explanation of large language models for a high school student using relatable analogies.", }, { title: "Write a blog post", prompt: "Write a blog post about how transformers revolutionized NLP, targeting software engineers who are new to AI.", }, ], }, { title: "Translate in Italian", prompt: `Translate in Italian: Some are born great, some achieve greatness, and some have greatness thrust upon 'em`, followUps: [ { title: "Back to English", prompt: "Translate the Italian version back into English while keeping Shakespeare's tone intact.", }, { title: "Explain choices", prompt: "Explain your translation choices for each key phrase from the Italian version.", }, { title: "Modernize", prompt: "Modernize the Italian translation into contemporary informal Italian suitable for social media.", }, { title: "Teach me Italian", prompt: "Help me practice Italian by conversing about this Shakespeare quote, correcting my grammar when needed.", }, ], }, { title: "Pelican on a bicycle", prompt: "Draw an SVG of a pelican riding a bicycle", followUps: [ { title: "Add a top hat", prompt: "Add a fancy top hat to the pelican and make it look distinguished", }, { title: "Make it animated", prompt: "Add CSS animations to make the bicycle wheels spin and the pelican's wings flap", }, ], }, ]; ================================================ FILE: src/lib/createShareLink.ts ================================================ import { base } from "$app/paths"; import { page } from "$app/state"; // Returns a public share URL for a conversation id. // If `id` is already a 7-char share id, no network call is made. export async function createShareLink(id: string): Promise { const prefix = page.data.publicConfig.PUBLIC_SHARE_PREFIX || `${page.data.publicConfig.PUBLIC_ORIGIN || page.url.origin}${base}`; if (id.length === 7) { return `${prefix}/r/${id}`; } const res = await fetch(`${base}/conversation/${id}/share`, { method: "POST", headers: { "Content-Type": "application/json" }, }); if (!res.ok) { const text = await res.text().catch(() => ""); throw new Error(text || "Failed to create share link"); } const { shareId } = await res.json(); return `${prefix}/r/${shareId}`; } ================================================ FILE: src/lib/jobs/refresh-conversation-stats.ts ================================================ import type { ConversationStats } from "$lib/types/ConversationStats"; import { CONVERSATION_STATS_COLLECTION, collections } from "$lib/server/database"; import { logger } from "$lib/server/logger"; import type { ObjectId } from "mongodb"; import { acquireLock, refreshLock } from "$lib/migrations/lock"; import { Semaphores } from "$lib/types/Semaphore"; async function getLastComputationTime(): Promise { const lastStats = await collections.conversationStats.findOne({}, { sort: { "date.at": -1 } }); return lastStats?.date?.at || new Date(0); } async function shouldComputeStats(): Promise { const lastComputationTime = await getLastComputationTime(); const oneDayAgo = new Date(Date.now() - 24 * 3_600_000); return lastComputationTime < oneDayAgo; } export async function computeAllStats() { for (const span of ["day", "week", "month"] as const) { computeStats({ dateField: "updatedAt", type: "conversation", span }).catch((e) => logger.error(e, "Error computing conversation stats for updatedAt") ); computeStats({ dateField: "createdAt", type: "conversation", span }).catch((e) => logger.error(e, "Error computing conversation stats for createdAt") ); computeStats({ dateField: "createdAt", type: "message", span }).catch((e) => logger.error(e, "Error computing message stats for createdAt") ); } } async function computeStats(params: { dateField: ConversationStats["date"]["field"]; span: ConversationStats["date"]["span"]; type: ConversationStats["type"]; }) { const indexes = await collections.semaphores.listIndexes().toArray(); if (indexes.length <= 2) { logger.info("Indexes not created, skipping stats computation"); return; } const lastComputed = await collections.conversationStats.findOne( { "date.field": params.dateField, "date.span": params.span, type: params.type }, { sort: { "date.at": -1 } } ); // If the last computed week is at the beginning of the last computed month, we need to include some days from the previous month // In those cases we need to compute the stats from before the last month as everything is one aggregation const minDate = lastComputed ? lastComputed.date.at : new Date(0); logger.debug( { minDate, dateField: params.dateField, span: params.span, type: params.type }, "Computing conversation stats" ); const dateField = params.type === "message" ? "messages." + params.dateField : params.dateField; const pipeline = [ { $match: { [dateField]: { $gte: minDate }, }, }, // For message stats: use $filter to reduce data before $unwind (optimization) // For conversation stats: simple projection ...(params.type === "message" ? [ { $project: { // Filter messages by date, then map to only keep the date field // This avoids carrying large message payloads (content, files, etc.) through the pipeline messages: { $map: { input: { $filter: { input: "$messages", as: "msg", cond: { $gte: [`$$msg.${params.dateField}`, minDate] }, }, }, as: "msg", in: { [params.dateField]: `$$msg.${params.dateField}` }, }, }, sessionId: 1, userId: 1, }, }, { $unwind: "$messages", }, ] : [ { $project: { [dateField]: 1, sessionId: 1, userId: 1, }, }, ]), { $sort: { [dateField]: 1, }, }, { $facet: { userId: [ { $match: { userId: { $exists: true }, }, }, { $group: { _id: { at: { $dateTrunc: { date: `$${dateField}`, unit: params.span } }, userId: "$userId", }, }, }, { $group: { _id: "$_id.at", count: { $sum: 1 }, }, }, { $project: { _id: 0, date: { at: "$_id", field: params.dateField, span: params.span, }, distinct: "userId", count: 1, }, }, ], sessionId: [ { $match: { sessionId: { $exists: true }, }, }, { $group: { _id: { at: { $dateTrunc: { date: `$${dateField}`, unit: params.span } }, sessionId: "$sessionId", }, }, }, { $group: { _id: "$_id.at", count: { $sum: 1 }, }, }, { $project: { _id: 0, date: { at: "$_id", field: params.dateField, span: params.span, }, distinct: "sessionId", count: 1, }, }, ], userOrSessionId: [ { $group: { _id: { at: { $dateTrunc: { date: `$${dateField}`, unit: params.span } }, userOrSessionId: { $ifNull: ["$userId", "$sessionId"] }, }, }, }, { $group: { _id: "$_id.at", count: { $sum: 1 }, }, }, { $project: { _id: 0, date: { at: "$_id", field: params.dateField, span: params.span, }, distinct: "userOrSessionId", count: 1, }, }, ], _id: [ { $group: { _id: { $dateTrunc: { date: `$${dateField}`, unit: params.span } }, count: { $sum: 1 }, }, }, { $project: { _id: 0, date: { at: "$_id", field: params.dateField, span: params.span, }, distinct: "_id", count: 1, }, }, ], }, }, { $project: { stats: { $concatArrays: ["$userId", "$sessionId", "$userOrSessionId", "$_id"], }, }, }, { $unwind: "$stats", }, { $replaceRoot: { newRoot: "$stats", }, }, { $set: { type: params.type, }, }, { $merge: { into: CONVERSATION_STATS_COLLECTION, on: ["date.at", "type", "date.span", "date.field", "distinct"], whenMatched: "replace", whenNotMatched: "insert", }, }, ]; await collections.conversations.aggregate(pipeline, { allowDiskUse: true }).next(); logger.debug( { minDate, dateField: params.dateField, span: params.span, type: params.type }, "Computed conversation stats" ); } let hasLock = false; let lockId: ObjectId | null = null; async function maintainLock() { if (hasLock && lockId) { hasLock = await refreshLock(Semaphores.CONVERSATION_STATS, lockId); if (!hasLock) { lockId = null; } } else if (!hasLock) { lockId = (await acquireLock(Semaphores.CONVERSATION_STATS)) || null; hasLock = !!lockId; } setTimeout(maintainLock, 10_000); } export function refreshConversationStats() { const ONE_HOUR_MS = 3_600_000; maintainLock().then(async () => { if (await shouldComputeStats()) { computeAllStats(); } setInterval(async () => { if (await shouldComputeStats()) { computeAllStats(); } }, 24 * ONE_HOUR_MS); }); } ================================================ FILE: src/lib/migrations/lock.ts ================================================ import { collections } from "$lib/server/database"; import { ObjectId } from "mongodb"; import type { Semaphores } from "$lib/types/Semaphore"; /** * Returns the lock id if the lock was acquired, false otherwise */ export async function acquireLock(key: Semaphores | string): Promise { try { const id = new ObjectId(); const insert = await collections.semaphores.insertOne({ _id: id, key, createdAt: new Date(), updatedAt: new Date(), deleteAt: new Date(Date.now() + 1000 * 60 * 3), // 3 minutes }); return insert.acknowledged ? id : false; // true if the document was inserted } catch (e) { // unique index violation, so there must already be a lock return false; } } export async function releaseLock(key: Semaphores | string, lockId: ObjectId) { await collections.semaphores.deleteOne({ _id: lockId, key, }); } export async function isDBLocked(key: Semaphores | string): Promise { const res = await collections.semaphores.countDocuments({ key, }); return res > 0; } export async function refreshLock(key: Semaphores | string, lockId: ObjectId): Promise { const result = await collections.semaphores.updateOne( { _id: lockId, key, }, { $set: { updatedAt: new Date(), deleteAt: new Date(Date.now() + 1000 * 60 * 3), // 3 minutes }, } ); return result.matchedCount > 0; } ================================================ FILE: src/lib/migrations/migrations.spec.ts ================================================ import { afterEach, assert, beforeAll, describe, expect, it } from "vitest"; import { migrations } from "./routines"; import { acquireLock, isDBLocked, refreshLock, releaseLock } from "./lock"; import { Semaphores } from "$lib/types/Semaphore"; import { collections, ready } from "$lib/server/database"; describe( "migrations", { retry: 3, }, () => { beforeAll(async () => { await ready; try { await collections.semaphores.createIndex({ key: 1 }, { unique: true }); } catch (e) { // Index might already exist, ignore error } }, 20000); it("should not have duplicates guid", async () => { const guids = migrations.map((m) => m._id.toString()); const uniqueGuids = [...new Set(guids)]; expect(uniqueGuids.length).toBe(guids.length); }); it("should acquire only one lock on DB", async () => { const results = await Promise.all( new Array(1000).fill(0).map(() => acquireLock(Semaphores.TEST_MIGRATION)) ); const locks = results.filter((r) => r); const semaphores = await collections.semaphores.find({}).toArray(); expect(locks.length).toBe(1); expect(semaphores).toBeDefined(); expect(semaphores.length).toBe(1); expect(semaphores?.[0].key).toBe(Semaphores.TEST_MIGRATION); }); it("should read the lock correctly", async () => { const lockId = await acquireLock(Semaphores.TEST_MIGRATION); assert(lockId); expect(await isDBLocked(Semaphores.TEST_MIGRATION)).toBe(true); expect(!!(await acquireLock(Semaphores.TEST_MIGRATION))).toBe(false); await releaseLock(Semaphores.TEST_MIGRATION, lockId); expect(await isDBLocked(Semaphores.TEST_MIGRATION)).toBe(false); }); it("should refresh the lock", async () => { const lockId = await acquireLock(Semaphores.TEST_MIGRATION); assert(lockId); // get the updatedAt time const updatedAtInitially = (await collections.semaphores.findOne({}))?.updatedAt; await refreshLock(Semaphores.TEST_MIGRATION, lockId); const updatedAtAfterRefresh = (await collections.semaphores.findOne({}))?.updatedAt; expect(updatedAtInitially).toBeDefined(); expect(updatedAtAfterRefresh).toBeDefined(); expect(updatedAtInitially).not.toBe(updatedAtAfterRefresh); }); afterEach(async () => { await collections.semaphores.deleteMany({}); await collections.migrationResults.deleteMany({}); }); } ); ================================================ FILE: src/lib/migrations/migrations.ts ================================================ import { Database } from "$lib/server/database"; import { migrations } from "./routines"; import { acquireLock, releaseLock, isDBLocked, refreshLock } from "./lock"; import { Semaphores } from "$lib/types/Semaphore"; import { logger } from "$lib/server/logger"; import { config } from "$lib/server/config"; export async function checkAndRunMigrations() { // make sure all GUIDs are unique if (new Set(migrations.map((m) => m._id.toString())).size !== migrations.length) { throw new Error("Duplicate migration GUIDs found."); } // check if all migrations have already been run const migrationResults = await (await Database.getInstance()) .getCollections() .migrationResults.find() .toArray(); logger.debug("[MIGRATIONS] Begin check..."); // connect to the database const connectedClient = await (await Database.getInstance()).getClient().connect(); const lockId = await acquireLock(Semaphores.MIGRATION); if (!lockId) { // another instance already has the lock, so we exit early logger.debug( "[MIGRATIONS] Another instance already has the lock. Waiting for DB to be unlocked." ); // Todo: is this necessary? Can we just return? // block until the lock is released while (await isDBLocked(Semaphores.MIGRATION)) { await new Promise((resolve) => setTimeout(resolve, 1000)); } return; } // once here, we have the lock // make sure to refresh it regularly while it's running const refreshInterval = setInterval(async () => { await refreshLock(Semaphores.MIGRATION, lockId); }, 1000 * 10); // iterate over all migrations for (const migration of migrations) { // check if the migration has already been applied const shouldRun = migration.runEveryTime || !migrationResults.find((m) => m._id.toString() === migration._id.toString()); // check if the migration has already been applied if (!shouldRun) { logger.debug(`[MIGRATIONS] "${migration.name}" already applied. Skipping...`); } else { // check the modifiers to see if some cases match if ( (migration.runForHuggingChat === "only" && !config.isHuggingChat) || (migration.runForHuggingChat === "never" && config.isHuggingChat) ) { logger.debug( `[MIGRATIONS] "${migration.name}" should not be applied for this run. Skipping...` ); continue; } // otherwise all is good and we can run the migration logger.debug( `[MIGRATIONS] "${migration.name}" ${ migration.runEveryTime ? "should run every time" : "not applied yet" }. Applying...` ); await (await Database.getInstance()).getCollections().migrationResults.updateOne( { _id: migration._id }, { $set: { name: migration.name, status: "ongoing", }, }, { upsert: true } ); const session = connectedClient.startSession(); let result = false; try { await session.withTransaction(async () => { result = await migration.up(await Database.getInstance()); }); } catch (e) { logger.error(e, `[MIGRATIONS] "${migration.name}" failed!`); } finally { await session.endSession(); } await (await Database.getInstance()).getCollections().migrationResults.updateOne( { _id: migration._id }, { $set: { name: migration.name, status: result ? "success" : "failure", }, }, { upsert: true } ); } } logger.debug("[MIGRATIONS] All migrations applied. Releasing lock"); clearInterval(refreshInterval); await releaseLock(Semaphores.MIGRATION, lockId); } ================================================ FILE: src/lib/migrations/routines/01-update-search-assistants.ts ================================================ import type { Migration } from "."; import { collections } from "$lib/server/database"; import { ObjectId, type AnyBulkWriteOperation } from "mongodb"; import type { Assistant } from "$lib/types/Assistant"; import { generateSearchTokens } from "$lib/utils/searchTokens"; const migration: Migration = { _id: new ObjectId("5f9f3e3e3e3e3e3e3e3e3e3e"), name: "Update search assistants", up: async () => { const { assistants } = collections; let ops: AnyBulkWriteOperation[] = []; for await (const assistant of assistants .find() .project>({ _id: 1, name: 1 })) { ops.push({ updateOne: { filter: { _id: assistant._id, }, update: { $set: { searchTokens: generateSearchTokens(assistant.name), }, }, }, }); if (ops.length >= 1000) { process.stdout.write("."); await assistants.bulkWrite(ops, { ordered: false }); ops = []; } } if (ops.length) { await assistants.bulkWrite(ops, { ordered: false }); } return true; }, down: async () => { const { assistants } = collections; await assistants.updateMany({}, { $unset: { searchTokens: "" } }); return true; }, }; export default migration; ================================================ FILE: src/lib/migrations/routines/02-update-assistants-models.ts ================================================ import type { Migration } from "."; import { collections } from "$lib/server/database"; import { ObjectId } from "mongodb"; const updateAssistantsModels: Migration = { _id: new ObjectId("5f9f3f3f3f3f3f3f3f3f3f3f"), name: "Update deprecated models in assistants with the default model", up: async () => { const models = (await import("$lib/server/models")).models; //@ts-expect-error the property doesn't exist anymore, keeping the script for reference const oldModels = (await import("$lib/server/models")).oldModels; const { assistants } = collections; const modelIds = models.map((el) => el.id); const defaultModelId = models[0].id; // Find all assistants whose modelId is not in modelIds, and update it const bulkOps = await assistants .find({ modelId: { $nin: modelIds } }) .map((assistant) => { // has an old model let newModelId = defaultModelId; const oldModel = oldModels.find((m: (typeof models)[number]) => m.id === assistant.modelId); if (oldModel && oldModel.transferTo && !!models.find((m) => m.id === oldModel.transferTo)) { newModelId = oldModel.transferTo; } return { updateOne: { filter: { _id: assistant._id }, update: { $set: { modelId: newModelId } }, }, }; }) .toArray(); if (bulkOps.length > 0) { await assistants.bulkWrite(bulkOps); } return true; }, runEveryTime: true, runForHuggingChat: "only", }; export default updateAssistantsModels; ================================================ FILE: src/lib/migrations/routines/04-update-message-updates.ts ================================================ import type { Migration } from "."; import { collections } from "$lib/server/database"; import { ObjectId, type WithId } from "mongodb"; import type { Conversation } from "$lib/types/Conversation"; import { MessageUpdateStatus, MessageUpdateType, type MessageUpdate, } from "$lib/types/MessageUpdate"; import type { Message } from "$lib/types/Message"; // isMessageWebSearchSourcesUpdate removed from utils; use inline predicate // ----------- // Copy of the previous message update types export type FinalAnswer = { type: "finalAnswer"; text: string; }; export type TextStreamUpdate = { type: "stream"; token: string; }; type WebSearchUpdate = { type: "webSearch"; messageType: "update" | "error" | "sources"; message: string; args?: string[]; sources?: { title?: string; link: string }[]; }; type StatusUpdate = { type: "status"; status: "started" | "pending" | "finished" | "error" | "title"; message?: string; }; type ErrorUpdate = { type: "error"; message: string; name: string; }; type FileUpdate = { type: "file"; sha: string; }; type OldMessageUpdate = | FinalAnswer | TextStreamUpdate | WebSearchUpdate | StatusUpdate | ErrorUpdate | FileUpdate; /** Converts the old message update to the new schema */ function convertMessageUpdate(message: Message, update: OldMessageUpdate): MessageUpdate | null { try { // Text and files if (update.type === "finalAnswer") { return { type: MessageUpdateType.FinalAnswer, text: update.text, interrupted: message.interrupted ?? false, }; } else if (update.type === "stream") { return { type: MessageUpdateType.Stream, token: update.token, }; } else if (update.type === "file") { return { type: MessageUpdateType.File, name: "Unknown", sha: update.sha, // assume jpeg but could be any image. should be harmless mime: "image/jpeg", }; } // Status else if (update.type === "status") { if (update.status === "title") { return { type: MessageUpdateType.Title, title: update.message ?? "New Chat", }; } if (update.status === "pending") return null; const status = update.status === "started" ? MessageUpdateStatus.Started : update.status === "finished" ? MessageUpdateStatus.Finished : MessageUpdateStatus.Error; return { type: MessageUpdateType.Status, status, message: update.message, }; } else if (update.type === "error") { // Treat it as an error status update return { type: MessageUpdateType.Status, status: MessageUpdateStatus.Error, message: update.message, }; } // Web Search else if (update.type === "webSearch") { return null; // Web search updates are no longer supported } console.warn("Unknown message update during migration:", update); return null; } catch (error) { console.error("Error converting message update during migration. Skipping it... Error:", error); return null; } } const updateMessageUpdates: Migration = { _id: new ObjectId("5f9f7f7f7f7f7f7f7f7f7f7f"), name: "Convert message updates to the new schema", up: async () => { const allConversations = collections.conversations.find({}); let conversation: WithId> | null = null; while ((conversation = await allConversations.tryNext())) { const messages = conversation.messages.map((message) => { // Convert all of the existing updates to the new schema const updates = message.updates ?.map((update) => convertMessageUpdate(message, update as OldMessageUpdate)) .filter((update): update is MessageUpdate => Boolean(update)); return { ...message, updates }; }); // Set the new messages array await collections.conversations.updateOne({ _id: conversation._id }, { $set: { messages } }); } return true; }, runEveryTime: false, }; export default updateMessageUpdates; ================================================ FILE: src/lib/migrations/routines/05-update-message-files.ts ================================================ import { ObjectId, type WithId } from "mongodb"; import { collections } from "$lib/server/database"; import type { Migration } from "."; import type { Conversation } from "$lib/types/Conversation"; import type { MessageFile } from "$lib/types/Message"; const updateMessageFiles: Migration = { _id: new ObjectId("5f9f5f5f5f5f5f5f5f5f5f5f"), name: "Convert message files to the new schema", up: async () => { const allConversations = collections.conversations.find({}, { projection: { messages: 1 } }); let conversation: WithId> | null = null; while ((conversation = await allConversations.tryNext())) { const messages = conversation.messages.map((message) => { const files = (message.files as string[] | undefined)?.map((file) => { // File is already in the new format if (typeof file !== "string") return file; // File was a hash pointing to a file in the bucket if (file.length === 64) { return { type: "hash", name: "unknown.jpg", value: file, mime: "image/jpeg", }; } // File was a base64 string else { return { type: "base64", name: "unknown.jpg", value: file, mime: "image/jpeg", }; } }); return { ...message, files, }; }); // Set the new messages array await collections.conversations.updateOne({ _id: conversation._id }, { $set: { messages } }); } return true; }, runEveryTime: false, }; export default updateMessageFiles; ================================================ FILE: src/lib/migrations/routines/06-trim-message-updates.ts ================================================ import type { Migration } from "."; import { collections } from "$lib/server/database"; import { ObjectId, type WithId } from "mongodb"; import type { Conversation } from "$lib/types/Conversation"; import type { Message } from "$lib/types/Message"; import type { MessageUpdate } from "$lib/types/MessageUpdate"; import { logger } from "$lib/server/logger"; // ----------- /** Converts the old message update to the new schema */ function convertMessageUpdate(message: Message, update: unknown): MessageUpdate | null { try { // Trim legacy web search updates entirely if ( typeof update === "object" && update !== null && (update as { type: string }).type === "webSearch" ) { return null; } return update as MessageUpdate; } catch (error) { logger.error(error, "Error converting message update during migration. Skipping it.."); return null; } } const trimMessageUpdates: Migration = { _id: new ObjectId("000000000000000000000006"), name: "Trim message updates to reduce stored size", up: async () => { const allConversations = collections.conversations.find({}); let conversation: WithId> | null = null; while ((conversation = await allConversations.tryNext())) { const messages = conversation.messages.map((message) => { // Convert all of the existing updates to the new schema const updates = message.updates ?.map((update) => convertMessageUpdate(message, update)) .filter((update): update is MessageUpdate => Boolean(update)); return { ...message, updates }; }); // Set the new messages array await collections.conversations.updateOne({ _id: conversation._id }, { $set: { messages } }); } return true; }, runEveryTime: false, }; export default trimMessageUpdates; ================================================ FILE: src/lib/migrations/routines/08-update-featured-to-review.ts ================================================ import type { Migration } from "."; import { collections } from "$lib/server/database"; import { ObjectId } from "mongodb"; import { ReviewStatus } from "$lib/types/Review"; const updateFeaturedToReview: Migration = { _id: new ObjectId("000000000000000000000008"), name: "Update featured to review", up: async () => { const { assistants, tools } = collections; // Update assistants await assistants.updateMany({ featured: true }, { $set: { review: ReviewStatus.APPROVED } }); await assistants.updateMany( { featured: { $ne: true } }, { $set: { review: ReviewStatus.PRIVATE } } ); await assistants.updateMany({}, { $unset: { featured: "" } }); // Update tools await tools.updateMany({ featured: true }, { $set: { review: ReviewStatus.APPROVED } }); await tools.updateMany({ featured: { $ne: true } }, { $set: { review: ReviewStatus.PRIVATE } }); await tools.updateMany({}, { $unset: { featured: "" } }); return true; }, runEveryTime: false, }; export default updateFeaturedToReview; ================================================ FILE: src/lib/migrations/routines/09-delete-empty-conversations.spec.ts ================================================ import type { Session } from "$lib/types/Session"; import type { User } from "$lib/types/User"; import type { Conversation } from "$lib/types/Conversation"; import { ObjectId } from "mongodb"; import { deleteConversations } from "./09-delete-empty-conversations"; import { afterAll, afterEach, beforeAll, describe, expect, test } from "vitest"; import { collections, ready } from "$lib/server/database"; type Message = Conversation["messages"][number]; const userData = { _id: new ObjectId(), createdAt: new Date(), updatedAt: new Date(), username: "new-username", name: "name", avatarUrl: "https://example.com/avatar.png", hfUserId: "9999999999", } satisfies User; Object.freeze(userData); const sessionForUser = { _id: new ObjectId(), createdAt: new Date(), updatedAt: new Date(), userId: userData._id, sessionId: "session-id-9999999999", expiresAt: new Date(Date.now() + 1000 * 60 * 60 * 24), } satisfies Session; Object.freeze(sessionForUser); const userMessage = { from: "user", id: "user-message-id", content: "Hello, how are you?", } satisfies Message; const assistantMessage = { from: "assistant", id: "assistant-message-id", content: "I'm fine, thank you!", } satisfies Message; const systemMessage = { from: "system", id: "system-message-id", content: "This is a system message", } satisfies Message; const conversationBase = { _id: new ObjectId(), createdAt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000), updatedAt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000), model: "model-id", title: "title", messages: [], } satisfies Conversation; describe.sequential("Deleting discarded conversations", async () => { test("a conversation with no messages should get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, sessionId: sessionForUser.sessionId, }); const result = await deleteConversations(collections); expect(result).toBe(1); }); test("a conversation with no messages that is less than 1 hour old should not get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, sessionId: sessionForUser.sessionId, createdAt: new Date(Date.now() - 30 * 60 * 1000), }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with only system messages should get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, sessionId: sessionForUser.sessionId, messages: [systemMessage], }); const result = await deleteConversations(collections); expect(result).toBe(1); }); test("a conversation with a user message should not get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, sessionId: sessionForUser.sessionId, messages: [userMessage], }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with an assistant message should not get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, sessionId: sessionForUser.sessionId, messages: [assistantMessage], }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with a mix of messages should not get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, sessionId: sessionForUser.sessionId, messages: [systemMessage, userMessage, assistantMessage, userMessage, assistantMessage], }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with a userId and no sessionId should not get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, messages: [userMessage, assistantMessage], userId: userData._id, }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with no userId or sessionId should get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, messages: [userMessage, assistantMessage], }); const result = await deleteConversations(collections); expect(result).toBe(1); }); test("a conversation with a sessionId that exists should not get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, messages: [userMessage, assistantMessage], sessionId: sessionForUser.sessionId, }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with a userId and a sessionId that doesn't exist should NOT get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, userId: userData._id, messages: [userMessage, assistantMessage], sessionId: new ObjectId().toString(), }); const result = await deleteConversations(collections); expect(result).toBe(0); }); test("a conversation with only a sessionId that doesn't exist, should get deleted", async () => { await collections.conversations.insertOne({ ...conversationBase, messages: [userMessage, assistantMessage], sessionId: new ObjectId().toString(), }); const result = await deleteConversations(collections); expect(result).toBe(1); }); test("many conversations should get deleted", async () => { const conversations = Array.from({ length: 10010 }, () => ({ ...conversationBase, _id: new ObjectId(), })); await collections.conversations.insertMany(conversations); const result = await deleteConversations(collections); expect(result).toBe(10010); }); beforeAll(async () => { await ready; await collections.users.insertOne(userData); await collections.sessions.insertOne(sessionForUser); }, 20000); afterAll(async () => { await collections.users.deleteOne({ _id: userData._id, }); await collections.sessions.deleteOne({ _id: sessionForUser._id, }); await collections.conversations.deleteMany({}); }); afterEach(async () => { await collections.conversations.deleteMany({ _id: { $in: [conversationBase._id] }, }); }); }); ================================================ FILE: src/lib/migrations/routines/09-delete-empty-conversations.ts ================================================ import type { Migration } from "."; import { collections } from "$lib/server/database"; import { Collection, FindCursor, ObjectId } from "mongodb"; import { logger } from "$lib/server/logger"; import type { Conversation } from "$lib/types/Conversation"; const BATCH_SIZE = 1000; const DELETE_THRESHOLD_MS = 60 * 60 * 1000; async function deleteBatch(conversations: Collection, ids: ObjectId[]) { if (ids.length === 0) return 0; const deleteResult = await conversations.deleteMany({ _id: { $in: ids } }); return deleteResult.deletedCount; } async function processCursor( cursor: FindCursor, processBatchFn: (batch: T[]) => Promise ) { let batch = []; while (await cursor.hasNext()) { const doc = await cursor.next(); if (doc) { batch.push(doc); } if (batch.length >= BATCH_SIZE) { await processBatchFn(batch); batch = []; } } if (batch.length > 0) { await processBatchFn(batch); } } export async function deleteConversations( collections: typeof import("$lib/server/database").collections ) { let deleteCount = 0; const { conversations, sessions } = collections; // First criteria: Delete conversations with no user/assistant messages older than 1 hour const emptyConvCursor = conversations .find({ "messages.from": { $not: { $in: ["user", "assistant"] } }, createdAt: { $lt: new Date(Date.now() - DELETE_THRESHOLD_MS) }, }) .batchSize(BATCH_SIZE); await processCursor(emptyConvCursor, async (batch) => { const ids = batch.map((doc) => doc._id); deleteCount += await deleteBatch(conversations, ids); }); // Second criteria: Process conversations without users in batches and check sessions const noUserCursor = conversations.find({ userId: { $exists: false } }).batchSize(BATCH_SIZE); await processCursor(noUserCursor, async (batch) => { const sessionIds = [ ...new Set(batch.map((conv) => conv.sessionId).filter((id): id is string => !!id)), ]; const existingSessions = await sessions.find({ sessionId: { $in: sessionIds } }).toArray(); const validSessionIds = new Set(existingSessions.map((s) => s.sessionId)); const invalidConvs = batch.filter( (conv) => !conv.sessionId || !validSessionIds.has(conv.sessionId) ); const idsToDelete = invalidConvs.map((conv) => conv._id); deleteCount += await deleteBatch(conversations, idsToDelete); }); logger.info(`[MIGRATIONS] Deleted ${deleteCount} conversations in total.`); return deleteCount; } const deleteEmptyConversations: Migration = { _id: new ObjectId("000000000000000000000009"), name: "Delete conversations with no user or assistant messages or valid sessions", up: async () => { await deleteConversations(collections); return true; }, runEveryTime: false, runForHuggingChat: "only", }; export default deleteEmptyConversations; ================================================ FILE: src/lib/migrations/routines/10-update-reports-assistantid.ts ================================================ import { collections } from "$lib/server/database"; import type { Migration } from "."; import { ObjectId } from "mongodb"; const migration: Migration = { _id: new ObjectId("000000000000000000000010"), name: "Update reports with assistantId to use contentId", up: async () => { await collections.reports.updateMany( { assistantId: { $exists: true, $ne: null }, }, [ { $set: { object: "assistant", contentId: "$assistantId", }, }, { $unset: "assistantId", }, ] ); return true; }, }; export default migration; ================================================ FILE: src/lib/migrations/routines/index.ts ================================================ import type { ObjectId } from "mongodb"; import type { Database } from "$lib/server/database"; export interface Migration { _id: ObjectId; name: string; up: (client: Database) => Promise; down?: (client: Database) => Promise; runForFreshInstall?: "only" | "never"; // leave unspecified to run for both runForHuggingChat?: "only" | "never"; // leave unspecified to run for both runEveryTime?: boolean; } export const migrations: Migration[] = []; ================================================ FILE: src/lib/server/__tests__/conversation-stop-generating.spec.ts ================================================ import { afterEach, describe, expect, it, vi } from "vitest"; import { ObjectId } from "mongodb"; import { collections } from "$lib/server/database"; import { AbortRegistry } from "$lib/server/abortRegistry"; import { cleanupTestData, createTestConversation, createTestLocals, createTestUser, } from "$lib/server/api/__tests__/testHelpers"; import { POST } from "../../../routes/conversation/[id]/stop-generating/+server"; describe.sequential("POST /conversation/[id]/stop-generating", () => { afterEach(async () => { vi.restoreAllMocks(); await cleanupTestData(); }); it( "creates abort marker and aborts active registry controllers", { timeout: 30000 }, async () => { const { locals } = await createTestUser(); const conversation = await createTestConversation(locals); const abortSpy = vi.spyOn(AbortRegistry.getInstance(), "abort"); const response = await POST({ params: { id: conversation._id.toString() }, locals, } as never); expect(response.status).toBe(200); expect(abortSpy).toHaveBeenCalledWith(conversation._id.toString()); const marker = await collections.abortedGenerations.findOne({ conversationId: conversation._id, }); expect(marker).not.toBeNull(); expect(marker?.createdAt).toBeInstanceOf(Date); expect(marker?.updatedAt).toBeInstanceOf(Date); } ); it("updates updatedAt while preserving createdAt on repeated stop", async () => { const { locals } = await createTestUser(); const conversation = await createTestConversation(locals); await POST({ params: { id: conversation._id.toString() }, locals, } as never); const firstMarker = await collections.abortedGenerations.findOne({ conversationId: conversation._id, }); await new Promise((resolve) => setTimeout(resolve, 5)); await POST({ params: { id: conversation._id.toString() }, locals, } as never); const secondMarker = await collections.abortedGenerations.findOne({ conversationId: conversation._id, }); expect(firstMarker).not.toBeNull(); expect(secondMarker).not.toBeNull(); expect(secondMarker?.createdAt.getTime()).toBe(firstMarker?.createdAt.getTime()); expect(secondMarker?.updatedAt.getTime()).toBeGreaterThan( firstMarker?.updatedAt.getTime() ?? 0 ); }); it("throws 404 when conversation is not found", async () => { const { locals } = await createTestUser(); const missingId = new ObjectId().toString(); try { await POST({ params: { id: missingId }, locals, } as never); expect.fail("Expected 404 error"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(404); } }); it("throws 401 for unauthenticated requests", async () => { const locals = createTestLocals({ user: undefined, sessionId: undefined }); try { await POST({ params: { id: new ObjectId().toString() }, locals, } as never); expect.fail("Expected 401 error"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); }); ================================================ FILE: src/lib/server/abortRegistry.ts ================================================ import { logger } from "$lib/server/logger"; /** * Tracks active upstream generation requests so they can be cancelled on demand. * Multiple controllers can be registered per conversation (for threaded/background runs). */ export class AbortRegistry { private static instance: AbortRegistry; private controllers = new Map>(); public static getInstance(): AbortRegistry { if (!AbortRegistry.instance) { AbortRegistry.instance = new AbortRegistry(); } return AbortRegistry.instance; } public register(conversationId: string, controller: AbortController) { const key = conversationId.toString(); let set = this.controllers.get(key); if (!set) { set = new Set(); this.controllers.set(key, set); } set.add(controller); controller.signal.addEventListener( "abort", () => { this.unregister(key, controller); }, { once: true } ); } public abort(conversationId: string) { const set = this.controllers.get(conversationId); if (!set?.size) return; logger.debug({ conversationId }, "Aborting active generation via AbortRegistry"); for (const controller of set) { if (!controller.signal.aborted) { controller.abort(); } } this.controllers.delete(conversationId); } public unregister(conversationId: string, controller: AbortController) { const set = this.controllers.get(conversationId); if (!set) return; set.delete(controller); if (set.size === 0) { this.controllers.delete(conversationId); } } } ================================================ FILE: src/lib/server/abortedGenerations.ts ================================================ // Shouldn't be needed if we dove into sveltekit internals, see https://github.com/huggingface/chat-ui/pull/88#issuecomment-1523173850 import { logger } from "$lib/server/logger"; import { collections } from "$lib/server/database"; import { onExit } from "./exitHandler"; export class AbortedGenerations { private static instance: AbortedGenerations; private abortedGenerations: Record = {}; private constructor() { // Poll every 500ms for faster abort detection (reduced from 1000ms) const interval = setInterval(() => this.updateList(), 500); onExit(() => clearInterval(interval)); this.updateList(); } public static getInstance(): AbortedGenerations { if (!AbortedGenerations.instance) { AbortedGenerations.instance = new AbortedGenerations(); } return AbortedGenerations.instance; } public getAbortTime(conversationId: string): Date | undefined { return this.abortedGenerations[conversationId]; } private async updateList() { try { const aborts = await collections.abortedGenerations.find({}).sort({ createdAt: 1 }).toArray(); this.abortedGenerations = Object.fromEntries( aborts.map((abort) => [abort.conversationId.toString(), abort.updatedAt ?? abort.createdAt]) ); } catch (err) { logger.error(err, "Error updating aborted generations list"); } } } ================================================ FILE: src/lib/server/adminToken.ts ================================================ import { config } from "$lib/server/config"; import type { Session } from "$lib/types/Session"; import { logger } from "./logger"; import { v4 } from "uuid"; class AdminTokenManager { private token = config.ADMIN_TOKEN || v4(); // contains all session ids that are currently admin sessions private adminSessions: Array = []; public get enabled() { // if open id is configured, disable the feature return config.ADMIN_CLI_LOGIN === "true"; } public isAdmin(sessionId: Session["sessionId"]) { if (!this.enabled) return false; return this.adminSessions.includes(sessionId); } public checkToken(token: string, sessionId: Session["sessionId"]) { if (!this.enabled) return false; if (token === this.token) { logger.info(`[ADMIN] Token validated`); this.adminSessions.push(sessionId); this.token = config.ADMIN_TOKEN || v4(); return true; } return false; } public removeSession(sessionId: Session["sessionId"]) { this.adminSessions = this.adminSessions.filter((id) => id !== sessionId); } public displayToken() { // if admin token is set, don't display it if (!this.enabled || config.ADMIN_TOKEN) return; let port = process.env.PORT ? parseInt(process.env.PORT) : process.argv.includes("--port") ? parseInt(process.argv[process.argv.indexOf("--port") + 1]) : undefined; if (!port) { const mode = process.argv.find((arg) => arg === "preview" || arg === "dev"); if (mode === "preview") { port = 4173; } else if (mode === "dev") { port = 5173; } else { port = 3000; } } const url = (config.PUBLIC_ORIGIN || `http://localhost:${port}`) + "?token="; logger.info(`[ADMIN] You can login with ${url + this.token}`); } } export const adminTokenManager = new AdminTokenManager(); ================================================ FILE: src/lib/server/api/__tests__/conversations-id.spec.ts ================================================ import { describe, expect, it, afterEach } from "vitest"; import { ObjectId } from "mongodb"; import superjson from "superjson"; import { collections } from "$lib/server/database"; import { createTestLocals, createTestUser, createTestConversation, cleanupTestData, } from "./testHelpers"; import { GET, DELETE, PATCH } from "../../../../routes/api/v2/conversations/[id]/+server"; async function parseResponse(res: Response): Promise { return superjson.parse(await res.text()) as T; } function mockUrl(): URL { return new URL("http://localhost:5173/api/v2/conversations/some-id"); } describe.sequential("GET /api/v2/conversations/[id]", () => { afterEach(async () => { await cleanupTestData(); }); it("returns conversation data for owner", { timeout: 15000 }, async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "My Conversation", model: "test-model", preprompt: "You are helpful.", }); const res = await GET({ locals, params: { id: conv._id.toString() }, url: mockUrl(), } as never); expect(res.status).toBe(200); const data = await parseResponse<{ title: string; model: string; preprompt: string; id: string; }>(res); expect(data.title).toBe("My Conversation"); expect(data.model).toBe("test-model"); expect(data.preprompt).toBe("You are helpful."); expect(data.id).toBe(conv._id.toString()); }); it("throws 404 for non-existent conversation", async () => { const { locals } = await createTestUser(); const fakeId = new ObjectId().toString(); try { await GET({ locals, params: { id: fakeId }, url: mockUrl(), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(404); } }); it("throws 403 for another user's conversation", async () => { const { locals: localsA } = await createTestUser(); const { locals: localsB } = await createTestUser(); const conv = await createTestConversation(localsA, { title: "Private Chat" }); try { await GET({ locals: localsB, params: { id: conv._id.toString() }, url: mockUrl(), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(403); } }); it("throws 401 for unauthenticated request", async () => { const locals = createTestLocals({ sessionId: undefined, user: undefined }); try { await GET({ locals, params: { id: new ObjectId().toString() }, url: mockUrl(), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); it("throws 400 for invalid ObjectId format", async () => { const { locals } = await createTestUser(); try { await GET({ locals, params: { id: "not-a-valid-objectid" }, url: mockUrl(), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(400); } }); }); describe.sequential("DELETE /api/v2/conversations/[id]", () => { afterEach(async () => { await cleanupTestData(); }); it("removes owned conversation", async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "To Delete" }); const res = await DELETE({ locals, params: { id: conv._id.toString() }, } as never); expect(res.status).toBe(200); const data = await parseResponse<{ success: boolean }>(res); expect(data.success).toBe(true); const found = await collections.conversations.findOne({ _id: conv._id }); expect(found).toBeNull(); }); it("throws 404 for non-existent conversation", async () => { const { locals } = await createTestUser(); const fakeId = new ObjectId().toString(); try { await DELETE({ locals, params: { id: fakeId }, } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(404); } }); it("throws 401 for unauthenticated request", async () => { const locals = createTestLocals({ sessionId: undefined, user: undefined }); try { await DELETE({ locals, params: { id: new ObjectId().toString() }, } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); }); describe.sequential("PATCH /api/v2/conversations/[id]", () => { afterEach(async () => { await cleanupTestData(); }); it("updates title", async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "Old Title" }); const res = await PATCH({ locals, params: { id: conv._id.toString() }, request: new Request("http://localhost", { method: "PATCH", body: JSON.stringify({ title: "New Title" }), headers: { "Content-Type": "application/json" }, }), } as never); expect(res.status).toBe(200); const data = await parseResponse<{ success: boolean }>(res); expect(data.success).toBe(true); const updated = await collections.conversations.findOne({ _id: conv._id }); expect(updated?.title).toBe("New Title"); }); it("strips tags from title", async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "Old Title" }); const res = await PATCH({ locals, params: { id: conv._id.toString() }, request: new Request("http://localhost", { method: "PATCH", body: JSON.stringify({ title: "hiddenVisible Title" }), headers: { "Content-Type": "application/json" }, }), } as never); expect(res.status).toBe(200); const updated = await collections.conversations.findOne({ _id: conv._id }); expect(updated?.title).toBe("hiddenVisible Title"); }); it("rejects empty title", async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "Original" }); try { await PATCH({ locals, params: { id: conv._id.toString() }, request: new Request("http://localhost", { method: "PATCH", body: JSON.stringify({ title: "" }), headers: { "Content-Type": "application/json" }, }), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(400); } }); it("rejects title longer than 100 characters", async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "Original" }); const longTitle = "a".repeat(101); try { await PATCH({ locals, params: { id: conv._id.toString() }, request: new Request("http://localhost", { method: "PATCH", body: JSON.stringify({ title: longTitle }), headers: { "Content-Type": "application/json" }, }), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(400); } }); it("throws 404 for non-existent conversation", async () => { const { locals } = await createTestUser(); const fakeId = new ObjectId().toString(); try { await PATCH({ locals, params: { id: fakeId }, request: new Request("http://localhost", { method: "PATCH", body: JSON.stringify({ title: "New Title" }), headers: { "Content-Type": "application/json" }, }), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(404); } }); it("throws 401 for unauthenticated request", async () => { const locals = createTestLocals({ sessionId: undefined, user: undefined }); try { await PATCH({ locals, params: { id: new ObjectId().toString() }, request: new Request("http://localhost", { method: "PATCH", body: JSON.stringify({ title: "New Title" }), headers: { "Content-Type": "application/json" }, }), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); }); ================================================ FILE: src/lib/server/api/__tests__/conversations-message.spec.ts ================================================ import { describe, expect, it, afterEach } from "vitest"; import { ObjectId } from "mongodb"; import { v4 } from "uuid"; import superjson from "superjson"; import { collections } from "$lib/server/database"; import type { Message } from "$lib/types/Message"; import { createTestLocals, createTestUser, createTestConversation, cleanupTestData, } from "./testHelpers"; import { DELETE } from "../../../../routes/api/v2/conversations/[id]/message/[messageId]/+server"; async function parseResponse(res: Response): Promise { return superjson.parse(await res.text()) as T; } /** * Build a simple message tree: * * root (system) * -> msg1 (user) * -> msg2 (assistant) * -> msg3 (user) * -> unrelated (user) -- sibling branch from root */ function buildMessageTree(): { messages: Message[]; rootId: string; msg1Id: string; msg2Id: string; msg3Id: string; unrelatedId: string; } { const rootId = v4(); const msg1Id = v4(); const msg2Id = v4(); const msg3Id = v4(); const unrelatedId = v4(); const root: Message = { id: rootId, from: "system", content: "System prompt", ancestors: [], children: [msg1Id, unrelatedId], }; const msg1: Message = { id: msg1Id, from: "user", content: "Hello", ancestors: [rootId], children: [msg2Id], }; const msg2: Message = { id: msg2Id, from: "assistant", content: "Hi there!", ancestors: [rootId, msg1Id], children: [msg3Id], }; const msg3: Message = { id: msg3Id, from: "user", content: "How are you?", ancestors: [rootId, msg1Id, msg2Id], children: [], }; const unrelated: Message = { id: unrelatedId, from: "user", content: "Unrelated branch", ancestors: [rootId], children: [], }; return { messages: [root, msg1, msg2, msg3, unrelated], rootId, msg1Id, msg2Id, msg3Id, unrelatedId, }; } describe.sequential("DELETE /api/v2/conversations/[id]/message/[messageId]", () => { afterEach(async () => { await cleanupTestData(); }); it("removes target message and its descendants", { timeout: 30000 }, async () => { const { locals } = await createTestUser(); const tree = buildMessageTree(); const conv = await createTestConversation(locals, { messages: tree.messages, rootMessageId: tree.rootId, }); // Delete msg1 -> should also remove msg2 and msg3 (descendants) const res = await DELETE({ locals, params: { id: conv._id.toString(), messageId: tree.msg1Id }, } as never); expect(res.status).toBe(200); const data = await parseResponse<{ success: boolean }>(res); expect(data.success).toBe(true); const updated = await collections.conversations.findOne({ _id: conv._id }); expect(updated).not.toBeNull(); const remainingIds = (updated?.messages ?? []).map((m) => m.id); // msg1, msg2, msg3 should all be removed expect(remainingIds).not.toContain(tree.msg1Id); expect(remainingIds).not.toContain(tree.msg2Id); expect(remainingIds).not.toContain(tree.msg3Id); // root and unrelated should remain expect(remainingIds).toContain(tree.rootId); expect(remainingIds).toContain(tree.unrelatedId); }); it("cleans up children arrays referencing deleted message", async () => { const { locals } = await createTestUser(); const tree = buildMessageTree(); const conv = await createTestConversation(locals, { messages: tree.messages, rootMessageId: tree.rootId, }); // Delete msg1 -> root's children should no longer include msg1Id await DELETE({ locals, params: { id: conv._id.toString(), messageId: tree.msg1Id }, } as never); const updated = await collections.conversations.findOne({ _id: conv._id }); const rootMsg = updated?.messages.find((m) => m.id === tree.rootId); expect(rootMsg).toBeDefined(); expect(rootMsg?.children).not.toContain(tree.msg1Id); // The unrelated sibling should still be in root's children expect(rootMsg?.children).toContain(tree.unrelatedId); }); it("throws 404 for non-existent message", async () => { const { locals } = await createTestUser(); const tree = buildMessageTree(); const conv = await createTestConversation(locals, { messages: tree.messages, rootMessageId: tree.rootId, }); const fakeMessageId = v4(); try { await DELETE({ locals, params: { id: conv._id.toString(), messageId: fakeMessageId }, } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(404); } }); it("throws 401 for unauthenticated request", async () => { const locals = createTestLocals({ sessionId: undefined, user: undefined }); try { await DELETE({ locals, params: { id: new ObjectId().toString(), messageId: v4() }, } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); it("preserves unrelated messages in the tree", async () => { const { locals } = await createTestUser(); const tree = buildMessageTree(); const conv = await createTestConversation(locals, { messages: tree.messages, rootMessageId: tree.rootId, }); // Delete msg3 (a leaf) -> should only remove msg3, everything else stays const res = await DELETE({ locals, params: { id: conv._id.toString(), messageId: tree.msg3Id }, } as never); expect(res.status).toBe(200); const updated = await collections.conversations.findOne({ _id: conv._id }); const remainingIds = (updated?.messages ?? []).map((m) => m.id); expect(remainingIds).toHaveLength(4); expect(remainingIds).toContain(tree.rootId); expect(remainingIds).toContain(tree.msg1Id); expect(remainingIds).toContain(tree.msg2Id); expect(remainingIds).toContain(tree.unrelatedId); expect(remainingIds).not.toContain(tree.msg3Id); // msg2's children should no longer include msg3Id const msg2 = updated?.messages.find((m) => m.id === tree.msg2Id); expect(msg2?.children).not.toContain(tree.msg3Id); }); }); ================================================ FILE: src/lib/server/api/__tests__/conversations.spec.ts ================================================ import { describe, expect, it, afterEach } from "vitest"; import superjson from "superjson"; import { collections } from "$lib/server/database"; import { CONV_NUM_PER_PAGE } from "$lib/constants/pagination"; import { createTestLocals, createTestUser, createTestConversation, cleanupTestData, } from "./testHelpers"; import { GET, DELETE } from "../../../../routes/api/v2/conversations/+server"; async function parseResponse(res: Response): Promise { return superjson.parse(await res.text()) as T; } function mockUrl(params?: Record): URL { const url = new URL("http://localhost:5173/api/v2/conversations"); if (params) { for (const [key, value] of Object.entries(params)) { url.searchParams.set(key, value); } } return url; } describe.sequential("GET /api/v2/conversations", () => { afterEach(async () => { await cleanupTestData(); }); it("returns conversations for authenticated user", { timeout: 30000 }, async () => { const { locals } = await createTestUser(); const conv = await createTestConversation(locals, { title: "My Chat" }); const res = await GET({ locals, url: mockUrl(), } as never); expect(res.status).toBe(200); const data = await parseResponse<{ conversations: Array<{ title: string; _id: { toString(): string } }>; hasMore: boolean; }>(res); expect(data.conversations).toHaveLength(1); expect(data.conversations[0].title).toBe("My Chat"); expect(data.conversations[0]._id.toString()).toBe(conv._id.toString()); expect(data.hasMore).toBe(false); }); it("returns empty array for user with no conversations", async () => { const { locals } = await createTestUser(); const res = await GET({ locals, url: mockUrl(), } as never); expect(res.status).toBe(200); const data = await parseResponse<{ conversations: unknown[]; hasMore: boolean }>(res); expect(data.conversations).toHaveLength(0); expect(data.hasMore).toBe(false); }); it("supports pagination with p=0 and p=1", async () => { const { locals } = await createTestUser(); // Create CONV_NUM_PER_PAGE + 5 conversations with distinct updatedAt values for (let i = 0; i < CONV_NUM_PER_PAGE + 5; i++) { await createTestConversation(locals, { title: `Conv ${i}`, updatedAt: new Date(Date.now() - (CONV_NUM_PER_PAGE + 5 - i) * 1000), }); } const resPage0 = await GET({ locals, url: mockUrl({ p: "0" }), } as never); const dataPage0 = await parseResponse<{ conversations: Array<{ title: string }>; hasMore: boolean; }>(resPage0); expect(dataPage0.conversations).toHaveLength(CONV_NUM_PER_PAGE); expect(dataPage0.hasMore).toBe(true); const resPage1 = await GET({ locals, url: mockUrl({ p: "1" }), } as never); const dataPage1 = await parseResponse<{ conversations: Array<{ title: string }>; hasMore: boolean; }>(resPage1); expect(dataPage1.conversations).toHaveLength(5); expect(dataPage1.hasMore).toBe(false); }); it("returns hasMore=true when more than CONV_NUM_PER_PAGE exist", async () => { const { locals } = await createTestUser(); for (let i = 0; i < CONV_NUM_PER_PAGE + 1; i++) { await createTestConversation(locals, { title: `Conv ${i}`, updatedAt: new Date(Date.now() - i * 1000), }); } const res = await GET({ locals, url: mockUrl(), } as never); const data = await parseResponse<{ conversations: unknown[]; hasMore: boolean }>(res); expect(data.conversations).toHaveLength(CONV_NUM_PER_PAGE); expect(data.hasMore).toBe(true); }); it("sorts by updatedAt descending", async () => { const { locals } = await createTestUser(); await createTestConversation(locals, { title: "Oldest", updatedAt: new Date("2024-01-01"), }); await createTestConversation(locals, { title: "Newest", updatedAt: new Date("2024-06-01"), }); await createTestConversation(locals, { title: "Middle", updatedAt: new Date("2024-03-01"), }); const res = await GET({ locals, url: mockUrl(), } as never); const data = await parseResponse<{ conversations: Array<{ title: string }> }>(res); expect(data.conversations[0].title).toBe("Newest"); expect(data.conversations[1].title).toBe("Middle"); expect(data.conversations[2].title).toBe("Oldest"); }); it("throws 401 for unauthenticated request", async () => { const locals = createTestLocals({ sessionId: undefined, user: undefined }); try { await GET({ locals, url: mockUrl(), } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); it("does not return other users' conversations", async () => { const { locals: localsA } = await createTestUser(); const { locals: localsB } = await createTestUser(); await createTestConversation(localsA, { title: "User A Chat" }); await createTestConversation(localsB, { title: "User B Chat" }); const res = await GET({ locals: localsA, url: mockUrl(), } as never); const data = await parseResponse<{ conversations: Array<{ title: string }> }>(res); expect(data.conversations).toHaveLength(1); expect(data.conversations[0].title).toBe("User A Chat"); }); }); describe.sequential("DELETE /api/v2/conversations", () => { afterEach(async () => { await cleanupTestData(); }); it("removes all conversations for authenticated user", async () => { const { locals } = await createTestUser(); await createTestConversation(locals, { title: "Chat 1" }); await createTestConversation(locals, { title: "Chat 2" }); await createTestConversation(locals, { title: "Chat 3" }); const res = await DELETE({ locals } as never); expect(res.status).toBe(200); const data = await parseResponse(res); expect(data).toBe(3); const remaining = await collections.conversations.countDocuments(); expect(remaining).toBe(0); }); it("throws 401 for unauthenticated request", async () => { const locals = createTestLocals({ sessionId: undefined, user: undefined }); try { await DELETE({ locals } as never); expect.fail("Should have thrown"); } catch (e: unknown) { expect((e as { status: number }).status).toBe(401); } }); it("does not remove other users' conversations", async () => { const { locals: localsA } = await createTestUser(); const { locals: localsB } = await createTestUser(); await createTestConversation(localsA, { title: "User A Chat" }); await createTestConversation(localsB, { title: "User B Chat" }); const res = await DELETE({ locals: localsA } as never); const data = await parseResponse(res); expect(data).toBe(1); const remaining = await collections.conversations.countDocuments(); expect(remaining).toBe(1); const userBConvs = await collections.conversations .find({ userId: localsB.user?._id }) .toArray(); expect(userBConvs).toHaveLength(1); expect(userBConvs[0].title).toBe("User B Chat"); }); }); ================================================ FILE: src/lib/server/api/__tests__/misc.spec.ts ================================================ import { describe, it, expect, beforeEach } from "vitest"; import superjson from "superjson"; import { createTestLocals, createTestUser, cleanupTestData } from "./testHelpers"; import { GET as featureFlagsGET } from "../../../../routes/api/v2/feature-flags/+server"; import { GET as publicConfigGET } from "../../../../routes/api/v2/public-config/+server"; import type { FeatureFlags } from "$lib/server/api/types"; async function parseResponse(res: Response): Promise { return superjson.parse(await res.text()) as T; } function mockRequestEvent(locals: App.Locals) { return { locals, url: new URL("http://localhost"), request: new Request("http://localhost"), } as Parameters[0]; } describe("GET /api/v2/feature-flags", () => { beforeEach(async () => { await cleanupTestData(); }, 20000); it("returns correct shape with expected fields", async () => { const locals = createTestLocals(); const res = await featureFlagsGET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data).toHaveProperty("enableAssistants"); expect(data).toHaveProperty("loginEnabled"); expect(data).toHaveProperty("isAdmin"); expect(data).toHaveProperty("transcriptionEnabled"); expect(typeof data.enableAssistants).toBe("boolean"); expect(typeof data.loginEnabled).toBe("boolean"); expect(typeof data.isAdmin).toBe("boolean"); expect(typeof data.transcriptionEnabled).toBe("boolean"); }); it("reflects isAdmin from locals for non-admin user", async () => { const locals = createTestLocals({ isAdmin: false }); const res = await featureFlagsGET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data.isAdmin).toBe(false); }); it("reflects isAdmin from locals for admin user", async () => { const { locals } = await createTestUser(); locals.isAdmin = true; const res = await featureFlagsGET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data.isAdmin).toBe(true); }); }); describe("GET /api/v2/public-config", () => { it("returns an object", async () => { const locals = createTestLocals(); const res = await publicConfigGET(mockRequestEvent(locals)); const data = await parseResponse>(res); expect(data).toBeDefined(); expect(typeof data).toBe("object"); expect(data).not.toBeNull(); }); }); ================================================ FILE: src/lib/server/api/__tests__/testHelpers.ts ================================================ import { ObjectId } from "mongodb"; import { collections } from "$lib/server/database"; import type { User } from "$lib/types/User"; import type { Session } from "$lib/types/Session"; import type { Conversation } from "$lib/types/Conversation"; export function createTestLocals(overrides?: Partial): App.Locals { return { sessionId: "test-session-id", isAdmin: false, user: undefined, token: undefined, ...overrides, }; } export async function createTestUser(): Promise<{ user: User; session: Session; locals: App.Locals; }> { const userId = new ObjectId(); const sessionId = `test-session-${userId.toString()}`; const user: User = { _id: userId, createdAt: new Date(), updatedAt: new Date(), username: `user-${userId.toString().slice(0, 8)}`, name: "Test User", avatarUrl: "https://example.com/avatar.png", hfUserId: `hf-${userId.toString()}`, }; const session: Session = { _id: new ObjectId(), createdAt: new Date(), updatedAt: new Date(), userId, sessionId, expiresAt: new Date(Date.now() + 1000 * 60 * 60 * 24), }; await collections.users.insertOne(user); await collections.sessions.insertOne(session); return { user, session, locals: { user, sessionId, isAdmin: false, token: undefined, }, }; } export async function createTestConversation( locals: App.Locals, overrides?: Partial ): Promise { const conv: Conversation = { _id: new ObjectId(), title: "Test Conversation", model: "test-model", messages: [], createdAt: new Date(), updatedAt: new Date(), ...(locals.user ? { userId: locals.user._id } : { sessionId: locals.sessionId }), ...overrides, }; await collections.conversations.insertOne(conv); return conv; } export async function cleanupTestData() { await collections.conversations.deleteMany({}); await collections.abortedGenerations.deleteMany({}); await collections.users.deleteMany({}); await collections.sessions.deleteMany({}); await collections.settings.deleteMany({}); await collections.sharedConversations.deleteMany({}); await collections.reports.deleteMany({}); } ================================================ FILE: src/lib/server/api/__tests__/user-reports.spec.ts ================================================ import { describe, it, expect, beforeEach } from "vitest"; import { ObjectId } from "mongodb"; import superjson from "superjson"; import { collections } from "$lib/server/database"; import { createTestLocals, createTestUser, cleanupTestData } from "./testHelpers"; import { GET } from "../../../../routes/api/v2/user/reports/+server"; import type { Report } from "$lib/types/Report"; async function parseResponse(res: Response): Promise { return superjson.parse(await res.text()) as T; } function mockRequestEvent(locals: App.Locals) { return { locals, url: new URL("http://localhost"), request: new Request("http://localhost"), } as Parameters[0]; } describe("GET /api/v2/user/reports", () => { beforeEach(async () => { await cleanupTestData(); }, 20000); it("returns empty array for unauthenticated user", async () => { const locals = createTestLocals(); const res = await GET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data).toEqual([]); }); it("returns reports for authenticated user", async () => { const { user, locals } = await createTestUser(); const report1: Report = { _id: new ObjectId(), createdBy: user._id, object: "assistant", contentId: new ObjectId(), reason: "Inappropriate content", createdAt: new Date(), updatedAt: new Date(), }; const report2: Report = { _id: new ObjectId(), createdBy: user._id, object: "tool", contentId: new ObjectId(), reason: "Broken tool", createdAt: new Date(), updatedAt: new Date(), }; await collections.reports.insertMany([report1, report2]); const res = await GET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data).toHaveLength(2); expect(data[0]._id.toString()).toBe(report1._id.toString()); expect(data[1]._id.toString()).toBe(report2._id.toString()); expect(data[0].reason).toBe("Inappropriate content"); expect(data[1].reason).toBe("Broken tool"); }); it("returns empty array when authenticated user has no reports", async () => { const { locals } = await createTestUser(); const res = await GET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data).toEqual([]); }); }); ================================================ FILE: src/lib/server/api/__tests__/user.spec.ts ================================================ import { describe, it, expect, beforeEach } from "vitest"; import superjson from "superjson"; import { collections } from "$lib/server/database"; import { createTestLocals, createTestUser, cleanupTestData } from "./testHelpers"; import { GET as userGET } from "../../../../routes/api/v2/user/+server"; import { GET as settingsGET, POST as settingsPOST, } from "../../../../routes/api/v2/user/settings/+server"; async function parseResponse(res: Response): Promise { return superjson.parse(await res.text()) as T; } function mockRequestEvent(locals: App.Locals, overrides?: Record) { return { locals, url: new URL("http://localhost"), request: new Request("http://localhost"), ...overrides, } as Parameters[0]; } describe("GET /api/v2/user", () => { beforeEach(async () => { await cleanupTestData(); }, 20000); it("returns user info for authenticated user", async () => { const { user, locals } = await createTestUser(); const res = await userGET(mockRequestEvent(locals)); const data = await parseResponse>(res); expect(data).not.toBeNull(); expect(data).toMatchObject({ id: user._id.toString(), username: user.username, avatarUrl: user.avatarUrl, isAdmin: false, isEarlyAccess: false, }); }); it("returns null for unauthenticated user", async () => { const locals = createTestLocals(); const res = await userGET(mockRequestEvent(locals)); const data = await parseResponse(res); expect(data).toBeNull(); }); }); describe("GET /api/v2/user/settings", () => { beforeEach(async () => { await cleanupTestData(); }, 20000); it("returns default settings when none exist", async () => { const { locals } = await createTestUser(); const res = await settingsGET(mockRequestEvent(locals)); const data = await parseResponse>(res); expect(data).toMatchObject({ welcomeModalSeen: false, welcomeModalSeenAt: null, streamingMode: "smooth", directPaste: false, shareConversationsWithModelAuthors: true, customPrompts: {}, multimodalOverrides: {}, toolsOverrides: {}, providerOverrides: {}, }); }); it("returns stored settings with canonical streaming mode", async () => { const { user, locals } = await createTestUser(); await collections.settings.insertOne({ userId: user._id, shareConversationsWithModelAuthors: false, activeModel: "custom-model", streamingMode: "raw", directPaste: true, hapticsEnabled: true, customPrompts: { "my-model": "Be helpful" }, multimodalOverrides: {}, toolsOverrides: {}, hidePromptExamples: {}, providerOverrides: {}, welcomeModalSeenAt: new Date("2024-01-01"), createdAt: new Date(), updatedAt: new Date(), }); const res = await settingsGET(mockRequestEvent(locals)); const data = await parseResponse>(res); expect(data).toMatchObject({ welcomeModalSeen: true, shareConversationsWithModelAuthors: false, streamingMode: "raw", directPaste: true, customPrompts: { "my-model": "Be helpful" }, }); }); it("maps legacy stored streamingMode=final to smooth", async () => { const { user, locals } = await createTestUser(); const legacySettingsWithFinal = { userId: user._id, shareConversationsWithModelAuthors: true, activeModel: "custom-model", streamingMode: "final", directPaste: false, customPrompts: {}, multimodalOverrides: {}, toolsOverrides: {}, hidePromptExamples: {}, providerOverrides: {}, createdAt: new Date(), updatedAt: new Date(), }; await collections.settings.insertOne( legacySettingsWithFinal as unknown as Parameters[0] ); const res = await settingsGET(mockRequestEvent(locals)); const data = await parseResponse>(res); expect(data).toMatchObject({ streamingMode: "smooth", }); }); }); describe("POST /api/v2/user/settings", () => { beforeEach(async () => { await cleanupTestData(); }, 20000); it("creates settings with upsert", async () => { const { user, locals } = await createTestUser(); const body = { shareConversationsWithModelAuthors: false, activeModel: "test-model", customPrompts: {}, multimodalOverrides: {}, toolsOverrides: {}, providerOverrides: {}, streamingMode: "raw", directPaste: false, hidePromptExamples: {}, }; const res = await settingsPOST( mockRequestEvent(locals, { request: new Request("http://localhost", { method: "POST", body: JSON.stringify(body), headers: { "Content-Type": "application/json" }, }), }) ); expect(res.status).toBe(200); const stored = await collections.settings.findOne({ userId: user._id }); expect(stored).not.toBeNull(); expect(stored?.shareConversationsWithModelAuthors).toBe(false); expect(stored?.streamingMode).toBe("raw"); expect(stored?.createdAt).toBeInstanceOf(Date); expect(stored?.updatedAt).toBeInstanceOf(Date); }); it("sets welcomeModalSeenAt when welcomeModalSeen is true", async () => { const { user, locals } = await createTestUser(); const body = { welcomeModalSeen: true, shareConversationsWithModelAuthors: true, activeModel: "test-model", customPrompts: {}, multimodalOverrides: {}, toolsOverrides: {}, providerOverrides: {}, streamingMode: "smooth", directPaste: false, hidePromptExamples: {}, }; await settingsPOST( mockRequestEvent(locals, { request: new Request("http://localhost", { method: "POST", body: JSON.stringify(body), headers: { "Content-Type": "application/json" }, }), }) ); const stored = await collections.settings.findOne({ userId: user._id }); expect(stored).not.toBeNull(); expect(stored?.welcomeModalSeenAt).toBeInstanceOf(Date); }); it("validates body with Zod and applies defaults for missing fields", async () => { const { user, locals } = await createTestUser(); // POST with minimal body — Zod defaults should fill in the rest const body = {}; const res = await settingsPOST( mockRequestEvent(locals, { request: new Request("http://localhost", { method: "POST", body: JSON.stringify(body), headers: { "Content-Type": "application/json" }, }), }) ); expect(res.status).toBe(200); const stored = await collections.settings.findOne({ userId: user._id }); expect(stored).not.toBeNull(); // Zod defaults should be applied expect(stored?.shareConversationsWithModelAuthors).toBe(true); expect(stored?.streamingMode).toBe("smooth"); expect(stored?.directPaste).toBe(false); expect(stored?.customPrompts).toEqual({}); }); }); ================================================ FILE: src/lib/server/api/types.ts ================================================ import type { BackendModel } from "$lib/server/models"; export type GETModelsResponse = Array<{ id: string; name: string; websiteUrl?: string; modelUrl?: string; datasetName?: string; datasetUrl?: string; displayName: string; description?: string; logoUrl?: string; providers?: Array<{ provider: string } & Record>; promptExamples?: { title: string; prompt: string }[]; parameters: BackendModel["parameters"]; preprompt?: string; multimodal: boolean; multimodalAcceptedMimetypes?: string[]; supportsTools?: boolean; unlisted: boolean; hasInferenceAPI: boolean; isRouter: boolean; }>; export type GETOldModelsResponse = Array<{ id: string; name: string; displayName: string; transferTo?: string; }>; export interface FeatureFlags { enableAssistants: boolean; loginEnabled: boolean; isAdmin: boolean; transcriptionEnabled: boolean; } ================================================ FILE: src/lib/server/api/utils/requireAuth.ts ================================================ import { error } from "@sveltejs/kit"; /** * Throws 401 if neither a user._id nor sessionId is present in locals. */ export function requireAuth(locals: App.Locals): void { if (!locals.user?._id && !locals.sessionId) { error(401, "Must have a valid session or user"); } } /** * Throws 401 if no user/session, 403 if not admin. */ export function requireAdmin(locals: App.Locals): void { if (!locals.user && !locals.sessionId) { error(401, "Unauthorized"); } if (!locals.isAdmin) { error(403, "Admin privileges required"); } } ================================================ FILE: src/lib/server/api/utils/resolveConversation.ts ================================================ import { collections } from "$lib/server/database"; import { ObjectId } from "mongodb"; import { authCondition } from "$lib/server/auth"; import { convertLegacyConversation } from "$lib/utils/tree/convertLegacyConversation"; import { error } from "@sveltejs/kit"; /** * Resolve a conversation by ID. * - 7-char IDs → shared conversation lookup * - ObjectId strings → owned conversation lookup with auth check * * Returns the conversation with legacy fields converted and a `shared` flag. */ export async function resolveConversation( id: string, locals: App.Locals, fromShare?: string | null ) { let conversation; let shared = false; if (id.length === 7) { // shared link of length 7 conversation = await collections.sharedConversations.findOne({ _id: id, }); shared = true; if (!conversation) { error(404, "Conversation not found"); } } else { try { new ObjectId(id); } catch { error(400, "Invalid conversation ID format"); } conversation = await collections.conversations.findOne({ _id: new ObjectId(id), ...authCondition(locals), }); if (!conversation) { const conversationExists = (await collections.conversations.countDocuments({ _id: new ObjectId(id), })) !== 0; if (conversationExists) { error( 403, "You don't have access to this conversation. If someone gave you this link, ask them to use the 'share' feature instead." ); } error(404, "Conversation not found."); } if (fromShare && conversation.meta?.fromShareId === fromShare) { shared = true; } } return { ...conversation, ...convertLegacyConversation(conversation), shared, }; } ================================================ FILE: src/lib/server/api/utils/resolveModel.ts ================================================ import { error } from "@sveltejs/kit"; /** * Resolve a model by namespace and optional model name. * Looks up in the models registry and returns the model, or throws 404 if not found or unlisted. */ export async function resolveModel(namespace: string, model?: string) { let modelId = namespace; if (model) { modelId += "/" + model; } try { const { models } = await import("$lib/server/models"); const found = models.find((m) => m.id === modelId); if (!found || found.unlisted) { error(404, "Model not found"); } return found; } catch (e) { // Re-throw SvelteKit HttpErrors if (e && typeof e === "object" && "status" in e) { throw e; } error(500, "Models not available"); } } ================================================ FILE: src/lib/server/api/utils/superjsonResponse.ts ================================================ import superjson from "superjson"; /** * Create a JSON response serialized with superjson. * Matches the wire format of the former Elysia `mapResponse` hook. */ export function superjsonResponse(data: unknown, init?: ResponseInit): Response { return new Response(superjson.stringify(data), { ...init, headers: { "Content-Type": "application/json", ...init?.headers, }, }); } ================================================ FILE: src/lib/server/apiToken.ts ================================================ import { config } from "$lib/server/config"; export function getApiToken(locals: App.Locals | undefined) { if (config.USE_USER_TOKEN === "true") { if (!locals?.token) { throw new Error("User token not found"); } return locals.token; } return config.OPENAI_API_KEY || config.HF_TOKEN; } ================================================ FILE: src/lib/server/auth.ts ================================================ import { Issuer, type BaseClient, type UserinfoResponse, type TokenSet, custom, generators, } from "openid-client"; import type { RequestEvent } from "@sveltejs/kit"; import { addHours, addWeeks, differenceInMinutes, subMinutes } from "date-fns"; import { config } from "$lib/server/config"; import { sha256 } from "$lib/utils/sha256"; import { z } from "zod"; import { dev } from "$app/environment"; import { redirect, type Cookies } from "@sveltejs/kit"; import { collections } from "$lib/server/database"; import JSON5 from "json5"; import { logger } from "$lib/server/logger"; import { ObjectId } from "mongodb"; import { adminTokenManager } from "./adminToken"; import type { User } from "$lib/types/User"; import type { Session } from "$lib/types/Session"; import { base } from "$app/paths"; import { acquireLock, isDBLocked, releaseLock } from "$lib/migrations/lock"; import { Semaphores } from "$lib/types/Semaphore"; export interface OIDCSettings { redirectURI: string; } export interface OIDCUserInfo { token: TokenSet; userData: UserinfoResponse; } const stringWithDefault = (value: string) => z .string() .default(value) .transform((el) => (el ? el : value)); export const OIDConfig = z .object({ CLIENT_ID: stringWithDefault(config.OPENID_CLIENT_ID), CLIENT_SECRET: stringWithDefault(config.OPENID_CLIENT_SECRET), PROVIDER_URL: stringWithDefault(config.OPENID_PROVIDER_URL), SCOPES: stringWithDefault(config.OPENID_SCOPES), NAME_CLAIM: stringWithDefault(config.OPENID_NAME_CLAIM).refine( (el) => !["preferred_username", "email", "picture", "sub"].includes(el), { message: "nameClaim cannot be one of the restricted keys." } ), TOLERANCE: stringWithDefault(config.OPENID_TOLERANCE), RESOURCE: stringWithDefault(config.OPENID_RESOURCE), ID_TOKEN_SIGNED_RESPONSE_ALG: z.string().optional(), }) .parse(JSON5.parse(config.OPENID_CONFIG || "{}")); export const loginEnabled = !!OIDConfig.CLIENT_ID; const sameSite = z .enum(["lax", "none", "strict"]) .default(dev || config.ALLOW_INSECURE_COOKIES === "true" ? "lax" : "none") .parse(config.COOKIE_SAMESITE === "" ? undefined : config.COOKIE_SAMESITE); const secure = z .boolean() .default(!(dev || config.ALLOW_INSECURE_COOKIES === "true")) .parse(config.COOKIE_SECURE === "" ? undefined : config.COOKIE_SECURE === "true"); function sanitizeReturnPath(path: string | undefined | null): string | undefined { if (!path) { return undefined; } if (path.startsWith("//")) { return undefined; } if (!path.startsWith("/")) { return undefined; } return path; } export function refreshSessionCookie(cookies: Cookies, sessionId: string) { cookies.set(config.COOKIE_NAME, sessionId, { path: "/", // So that it works inside the space's iframe sameSite, secure, httpOnly: true, expires: addWeeks(new Date(), 2), }); } export async function findUser( sessionId: string, coupledCookieHash: string | undefined, url: URL ): Promise<{ user: User | null; invalidateSession: boolean; oauth?: Session["oauth"]; }> { const session = await collections.sessions.findOne({ sessionId }); if (!session) { return { user: null, invalidateSession: false }; } if (coupledCookieHash && session.coupledCookieHash !== coupledCookieHash) { return { user: null, invalidateSession: true }; } // Check if OAuth token needs refresh if (session.oauth?.token && session.oauth.refreshToken) { // If token expires in less than 5 minutes, refresh it if (differenceInMinutes(session.oauth.token.expiresAt, new Date()) < 5) { const lockKey = `${Semaphores.OAUTH_TOKEN_REFRESH}:${sessionId}`; // Acquire lock for token refresh const lockId = await acquireLock(lockKey); if (lockId) { try { // Attempt to refresh the token const newTokenSet = await refreshOAuthToken( { redirectURI: `${config.PUBLIC_ORIGIN}${base}/login/callback` }, session.oauth.refreshToken, url ); if (!newTokenSet || !newTokenSet.access_token) { // Token refresh failed, invalidate session return { user: null, invalidateSession: true }; } // Update session with new token information const updatedOAuth = tokenSetToSessionOauth(newTokenSet); if (!updatedOAuth) { // Token refresh failed, invalidate session return { user: null, invalidateSession: true }; } await collections.sessions.updateOne( { sessionId }, { $set: { oauth: updatedOAuth, updatedAt: new Date(), }, } ); session.oauth = updatedOAuth; } catch (err) { logger.error(err, "Error during token refresh:"); return { user: null, invalidateSession: true }; } finally { await releaseLock(lockKey, lockId); } } else if (new Date() > session.oauth.token.expiresAt) { // If the token has expired, we need to wait for the token refresh to complete let attempts = 0; do { await new Promise((resolve) => setTimeout(resolve, 200)); attempts++; if (attempts > 20) { return { user: null, invalidateSession: true }; } } while (await isDBLocked(lockKey)); const updatedSession = await collections.sessions.findOne({ sessionId }); if (!updatedSession || updatedSession.oauth?.token === session.oauth.token) { return { user: null, invalidateSession: true }; } session.oauth = updatedSession.oauth; } } } else if (session.oauth?.token && !session.oauth.refreshToken) { if (new Date() > session.oauth.token.expiresAt) { return { user: null, invalidateSession: true }; } } return { user: await collections.users.findOne({ _id: session.userId }), invalidateSession: false, oauth: session.oauth, }; } export const authCondition = (locals: App.Locals) => { if (!locals.user && !locals.sessionId) { throw new Error("User or sessionId is required"); } return locals.user ? { userId: locals.user._id } : { sessionId: locals.sessionId, userId: { $exists: false } }; }; export function tokenSetToSessionOauth(tokenSet: TokenSet): Session["oauth"] { if (!tokenSet.access_token) { return undefined; } return { token: { value: tokenSet.access_token, expiresAt: tokenSet.expires_at ? subMinutes(new Date(tokenSet.expires_at * 1000), 1) : addWeeks(new Date(), 2), }, refreshToken: tokenSet.refresh_token || undefined, }; } /** * Generates a CSRF token using the user sessionId. Note that we don't need a secret because sessionId is enough. */ export async function generateCsrfToken( sessionId: string, redirectUrl: string, next?: string ): Promise { const sanitizedNext = sanitizeReturnPath(next); const data = { expiration: addHours(new Date(), 1).getTime(), redirectUrl, ...(sanitizedNext ? { next: sanitizedNext } : {}), } as { expiration: number; redirectUrl: string; next?: string; }; return Buffer.from( JSON.stringify({ data, signature: await sha256(JSON.stringify(data) + "##" + sessionId), }) ).toString("base64"); } let lastIssuer: Issuer | null = null; let lastIssuerFetchedAt: Date | null = null; async function getOIDCClient(settings: OIDCSettings, url: URL): Promise { if ( lastIssuer && lastIssuerFetchedAt && differenceInMinutes(new Date(), lastIssuerFetchedAt) >= 10 ) { lastIssuer = null; lastIssuerFetchedAt = null; } if (!lastIssuer) { lastIssuer = await Issuer.discover(OIDConfig.PROVIDER_URL); lastIssuerFetchedAt = new Date(); } const issuer = lastIssuer; const client_config: ConstructorParameters[0] = { client_id: OIDConfig.CLIENT_ID, client_secret: OIDConfig.CLIENT_SECRET, redirect_uris: [settings.redirectURI], response_types: ["code"], [custom.clock_tolerance]: OIDConfig.TOLERANCE || undefined, id_token_signed_response_alg: OIDConfig.ID_TOKEN_SIGNED_RESPONSE_ALG || undefined, }; if (OIDConfig.CLIENT_ID === "__CIMD__") { // See https://datatracker.ietf.org/doc/draft-ietf-oauth-client-id-metadata-document/ client_config.client_id = new URL( `${base}/.well-known/oauth-cimd`, config.PUBLIC_ORIGIN || url.origin ).toString(); } const alg_supported = issuer.metadata["id_token_signing_alg_values_supported"]; if (Array.isArray(alg_supported)) { client_config.id_token_signed_response_alg ??= alg_supported[0]; } return new issuer.Client(client_config); } export async function getOIDCAuthorizationUrl( settings: OIDCSettings, params: { sessionId: string; next?: string; url: URL; cookies: Cookies } ): Promise { const client = await getOIDCClient(settings, params.url); const csrfToken = await generateCsrfToken( params.sessionId, settings.redirectURI, sanitizeReturnPath(params.next) ); const codeVerifier = generators.codeVerifier(); const codeChallenge = generators.codeChallenge(codeVerifier); params.cookies.set("hfChat-codeVerifier", codeVerifier, { path: "/", sameSite, secure, httpOnly: true, expires: addHours(new Date(), 1), }); return client.authorizationUrl({ code_challenge_method: "S256", code_challenge: codeChallenge, scope: OIDConfig.SCOPES, state: csrfToken, resource: OIDConfig.RESOURCE || undefined, }); } export async function getOIDCUserData( settings: OIDCSettings, code: string, codeVerifier: string, iss: string | undefined, url: URL ): Promise { const client = await getOIDCClient(settings, url); const token = await client.callback( settings.redirectURI, { code, iss, }, { code_verifier: codeVerifier } ); const userData = await client.userinfo(token); return { token, userData }; } /** * Refreshes an OAuth token using the refresh token */ export async function refreshOAuthToken( settings: OIDCSettings, refreshToken: string, url: URL ): Promise { const client = await getOIDCClient(settings, url); const tokenSet = await client.refresh(refreshToken); return tokenSet; } export async function validateAndParseCsrfToken( token: string, sessionId: string ): Promise<{ /** This is the redirect url that was passed to the OIDC provider */ redirectUrl: string; /** Relative path (within this app) to return to after login */ next?: string; } | null> { try { const { data, signature } = z .object({ data: z.object({ expiration: z.number().int(), redirectUrl: z.string().url(), next: z.string().optional(), }), signature: z.string().length(64), }) .parse(JSON.parse(token)); const reconstructSign = await sha256(JSON.stringify(data) + "##" + sessionId); if (data.expiration > Date.now() && signature === reconstructSign) { return { redirectUrl: data.redirectUrl, next: sanitizeReturnPath(data.next) }; } } catch (e) { logger.error(e, "Error validating and parsing CSRF token"); } return null; } type CookieRecord = Cookies; type HeaderRecord = Headers; export async function getCoupledCookieHash(cookie: CookieRecord): Promise { if (!config.COUPLE_SESSION_WITH_COOKIE_NAME) { return undefined; } const cookieValue = cookie.get(config.COUPLE_SESSION_WITH_COOKIE_NAME); if (!cookieValue) { return "no-cookie"; } return await sha256(cookieValue); } export async function authenticateRequest( headers: HeaderRecord, cookie: CookieRecord, url: URL, isApi?: boolean ): Promise { const token = cookie.get(config.COOKIE_NAME); let email = null; if (config.TRUSTED_EMAIL_HEADER) { email = headers.get(config.TRUSTED_EMAIL_HEADER); } let secretSessionId: string | null = null; let sessionId: string | null = null; if (email) { secretSessionId = sessionId = await sha256(email); return { user: { _id: new ObjectId(sessionId.slice(0, 24)), name: email, email, createdAt: new Date(), updatedAt: new Date(), hfUserId: email, avatarUrl: "", }, sessionId, secretSessionId, isAdmin: adminTokenManager.isAdmin(sessionId), }; } if (token) { secretSessionId = token; sessionId = await sha256(token); const result = await findUser(sessionId, await getCoupledCookieHash(cookie), url); if (result.invalidateSession) { secretSessionId = crypto.randomUUID(); sessionId = await sha256(secretSessionId); if (await collections.sessions.findOne({ sessionId })) { throw new Error("Session ID collision"); } } return { user: result.user ?? undefined, token: result.oauth?.token?.value, sessionId, secretSessionId, isAdmin: result.user?.isAdmin || adminTokenManager.isAdmin(sessionId), }; } if (isApi) { const authorization = headers.get("Authorization"); if (authorization?.startsWith("Bearer ")) { const token = authorization.slice(7); const hash = await sha256(token); sessionId = secretSessionId = hash; const cacheHit = await collections.tokenCaches.findOne({ tokenHash: hash }); if (cacheHit) { const user = await collections.users.findOne({ hfUserId: cacheHit.userId }); if (!user) { throw new Error("User not found"); } return { user, sessionId, token, secretSessionId, isAdmin: user.isAdmin || adminTokenManager.isAdmin(sessionId), }; } const response = await fetch("https://huggingface.co/api/whoami-v2", { headers: { Authorization: `Bearer ${token}` }, }); if (!response.ok) { throw new Error("Unauthorized"); } const data = await response.json(); const user = await collections.users.findOne({ hfUserId: data.id }); if (!user) { throw new Error("User not found"); } await collections.tokenCaches.insertOne({ tokenHash: hash, userId: data.id, createdAt: new Date(), updatedAt: new Date(), }); return { user, sessionId, secretSessionId, token, isAdmin: user.isAdmin || adminTokenManager.isAdmin(sessionId), }; } } // Generate new session if none exists secretSessionId = crypto.randomUUID(); sessionId = await sha256(secretSessionId); if (await collections.sessions.findOne({ sessionId })) { throw new Error("Session ID collision"); } return { user: undefined, sessionId, secretSessionId, isAdmin: false }; } export async function triggerOauthFlow({ url, locals, cookies }: RequestEvent): Promise { // const referer = request.headers.get("referer"); // let redirectURI = `${(referer ? new URL(referer) : url).origin}${base}/login/callback`; let redirectURI = `${url.origin}${base}/login/callback`; // TODO: Handle errors if provider is not responding if (url.searchParams.has("callback")) { const callback = url.searchParams.get("callback") || redirectURI; if (config.ALTERNATIVE_REDIRECT_URLS.includes(callback)) { redirectURI = callback; } } // Preserve a safe in-app return path after login. // Priority: explicit ?next=... (must be an absolute path), else the current path (when auto-login kicks in). let next: string | undefined = undefined; const nextParam = sanitizeReturnPath(url.searchParams.get("next")); if (nextParam) { // Only accept absolute in-app paths to prevent open redirects next = nextParam; } else if (!url.pathname.startsWith(`${base}/login`)) { // For automatic login on protected pages, return to the page the user was on next = sanitizeReturnPath(`${url.pathname}${url.search}`) ?? `${base}/`; } else { next = sanitizeReturnPath(`${base}/`) ?? "/"; } const authorizationUrl = await getOIDCAuthorizationUrl( { redirectURI }, { sessionId: locals.sessionId, next, url, cookies } ); throw redirect(302, authorizationUrl); } ================================================ FILE: src/lib/server/config.ts ================================================ import { env as publicEnv } from "$env/dynamic/public"; import { env as serverEnv } from "$env/dynamic/private"; import { building } from "$app/environment"; import type { Collection } from "mongodb"; import type { ConfigKey as ConfigKeyType } from "$lib/types/ConfigKey"; import type { Semaphore } from "$lib/types/Semaphore"; import { Semaphores } from "$lib/types/Semaphore"; export type PublicConfigKey = keyof typeof publicEnv; const keysFromEnv = { ...publicEnv, ...serverEnv }; export type ConfigKey = keyof typeof keysFromEnv; class ConfigManager { private keysFromDB: Partial> = {}; private isInitialized = false; private configCollection: Collection | undefined; private semaphoreCollection: Collection | undefined; private lastConfigUpdate: Date | undefined; async init() { if (this.isInitialized) return; if (building || import.meta.env.MODE === "test") { this.isInitialized = true; return; } const { getCollectionsEarly } = await import("./database"); const collections = await getCollectionsEarly(); this.configCollection = collections.config; this.semaphoreCollection = collections.semaphores; await this.checkForUpdates().then(() => { this.isInitialized = true; }); } get ConfigManagerEnabled() { return serverEnv.ENABLE_CONFIG_MANAGER === "true" && import.meta.env.MODE !== "test"; } get isHuggingChat() { return this.get("PUBLIC_APP_ASSETS") === "huggingchat"; } async checkForUpdates() { if (await this.isConfigStale()) { await this.updateConfig(); } } async isConfigStale(): Promise { if (!this.lastConfigUpdate || !this.isInitialized) { return true; } const count = await this.semaphoreCollection?.countDocuments({ key: Semaphores.CONFIG_UPDATE, updatedAt: { $gt: this.lastConfigUpdate }, }); return count !== undefined && count > 0; } async updateConfig() { const configs = (await this.configCollection?.find({}).toArray()) ?? []; this.keysFromDB = configs.reduce( (acc, curr) => { acc[curr.key as ConfigKey] = curr.value; return acc; }, {} as Record ); this.lastConfigUpdate = new Date(); } get(key: ConfigKey): string { if (!this.ConfigManagerEnabled) { return keysFromEnv[key] || ""; } return this.keysFromDB[key] || keysFromEnv[key] || ""; } async updateSemaphore() { await this.semaphoreCollection?.updateOne( { key: Semaphores.CONFIG_UPDATE }, { $set: { updatedAt: new Date(), }, $setOnInsert: { createdAt: new Date(), }, }, { upsert: true } ); } async set(key: ConfigKey, value: string) { if (!this.ConfigManagerEnabled) throw new Error("Config manager is disabled"); await this.configCollection?.updateOne({ key }, { $set: { value } }, { upsert: true }); this.keysFromDB[key] = value; await this.updateSemaphore(); } async delete(key: ConfigKey) { if (!this.ConfigManagerEnabled) throw new Error("Config manager is disabled"); await this.configCollection?.deleteOne({ key }); delete this.keysFromDB[key]; await this.updateSemaphore(); } async clear() { if (!this.ConfigManagerEnabled) throw new Error("Config manager is disabled"); await this.configCollection?.deleteMany({}); this.keysFromDB = {}; await this.updateSemaphore(); } getPublicConfig() { let config = { ...Object.fromEntries( Object.entries(keysFromEnv).filter(([key]) => key.startsWith("PUBLIC_")) ), } as Record; if (this.ConfigManagerEnabled) { config = { ...config, ...Object.fromEntries( Object.entries(this.keysFromDB).filter(([key]) => key.startsWith("PUBLIC_")) ), }; } const publicEnvKeys = Object.keys(publicEnv); return Object.fromEntries( Object.entries(config).filter(([key]) => publicEnvKeys.includes(key)) ) as Record; } } // Create the instance and initialize it. const configManager = new ConfigManager(); export const ready = (async () => { if (!building) { await configManager.init(); } })(); type ExtraConfigKeys = | "HF_TOKEN" | "OLD_MODELS" | "ENABLE_ASSISTANTS" | "METRICS_ENABLED" | "METRICS_PORT" | "MCP_SERVERS" | "MCP_FORWARD_HF_USER_TOKEN" | "MCP_TOOL_TIMEOUT_MS" | "EXA_API_KEY"; type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string }; export const config: ConfigProxy = new Proxy(configManager, { get(target, prop, receiver) { if (prop in target) { return Reflect.get(target, prop, receiver); } if (typeof prop === "string") { return target.get(prop as ConfigKey); } return undefined; }, set(target, prop, value, receiver) { if (prop in target) { return Reflect.set(target, prop, value, receiver); } if (typeof prop === "string") { target.set(prop as ConfigKey, value); return true; } return false; }, }) as ConfigProxy; ================================================ FILE: src/lib/server/conversation.ts ================================================ import { collections } from "$lib/server/database"; import { MetricsServer } from "$lib/server/metrics"; import { error } from "@sveltejs/kit"; import { ObjectId } from "mongodb"; import { authCondition } from "$lib/server/auth"; /** * Create a new conversation from a shared conversation ID. * If the conversation already exists for the user/session, return the existing conversation ID. * returns the conversation ID. */ export async function createConversationFromShare( fromShareId: string, locals: App.Locals, userAgent?: string ): Promise { const conversation = await collections.sharedConversations.findOne({ _id: fromShareId, }); if (!conversation) { error(404, "Conversation not found"); } // Check if shared conversation exists already for this user/session const existingConversation = await collections.conversations.findOne({ "meta.fromShareId": fromShareId, ...authCondition(locals), }); if (existingConversation) { return existingConversation._id.toString(); } // Create new conversation from shared conversation const res = await collections.conversations.insertOne({ _id: new ObjectId(), title: conversation.title.replace(/<\/?think>/gi, "").trim(), rootMessageId: conversation.rootMessageId, messages: conversation.messages, model: conversation.model, preprompt: conversation.preprompt, createdAt: new Date(), updatedAt: new Date(), userAgent, ...(locals.user ? { userId: locals.user._id } : { sessionId: locals.sessionId }), meta: { fromShareId }, }); // Copy files from shared conversation bucket entries to the new conversation // Shared files are stored with filenames "${sharedId}-${sha}" and metadata.conversation = sharedId // New conversation expects files to be stored under its own id prefix const newConvId = res.insertedId.toString(); const sharedId = fromShareId; const files = await collections.bucket.find({ filename: { $regex: `^${sharedId}-` } }).toArray(); await Promise.all( files.map( (file) => new Promise((resolve, reject) => { try { const newFilename = file.filename.replace(`${sharedId}-`, `${newConvId}-`); const downloadStream = collections.bucket.openDownloadStream(file._id); const uploadStream = collections.bucket.openUploadStream(newFilename, { metadata: { ...file.metadata, conversation: newConvId }, }); downloadStream .on("error", reject) .pipe(uploadStream) .on("error", reject) .on("finish", () => resolve()); } catch (e) { reject(e); } }) ) ); if (MetricsServer.isEnabled()) { MetricsServer.getMetrics().model.conversationsTotal.inc({ model: conversation.model }); } return res.insertedId.toString(); } ================================================ FILE: src/lib/server/database.ts ================================================ import { GridFSBucket, MongoClient, ReadPreference } from "mongodb"; import type { Conversation } from "$lib/types/Conversation"; import type { SharedConversation } from "$lib/types/SharedConversation"; import type { AbortedGeneration } from "$lib/types/AbortedGeneration"; import type { Settings } from "$lib/types/Settings"; import type { User } from "$lib/types/User"; import type { MessageEvent } from "$lib/types/MessageEvent"; import type { Session } from "$lib/types/Session"; import type { Assistant } from "$lib/types/Assistant"; import type { Report } from "$lib/types/Report"; import type { ConversationStats } from "$lib/types/ConversationStats"; import type { MigrationResult } from "$lib/types/MigrationResult"; import type { Semaphore } from "$lib/types/Semaphore"; import type { AssistantStats } from "$lib/types/AssistantStats"; import { MongoMemoryServer } from "mongodb-memory-server"; import { logger } from "$lib/server/logger"; import { building } from "$app/environment"; import type { TokenCache } from "$lib/types/TokenCache"; import { onExit } from "./exitHandler"; import { fileURLToPath } from "url"; import { dirname, join } from "path"; import { existsSync, mkdirSync } from "fs"; import { findRepoRoot } from "./findRepoRoot"; import type { ConfigKey } from "$lib/types/ConfigKey"; import { config } from "$lib/server/config"; export const CONVERSATION_STATS_COLLECTION = "conversations.stats"; export class Database { private client?: MongoClient; private mongoServer?: MongoMemoryServer; private static instance: Database; private async init() { const DB_FOLDER = config.MONGO_STORAGE_PATH || join(findRepoRoot(dirname(fileURLToPath(import.meta.url))), "db"); if (!config.MONGODB_URL) { logger.warn("No MongoDB URL found, using in-memory server"); logger.info(`Using database path: ${DB_FOLDER}`); // Create db directory if it doesn't exist if (!existsSync(DB_FOLDER)) { logger.info(`Creating database directory at ${DB_FOLDER}`); mkdirSync(DB_FOLDER, { recursive: true }); } this.mongoServer = await MongoMemoryServer.create({ instance: { dbName: config.MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : ""), dbPath: DB_FOLDER, }, binary: { version: "7.0.18", }, }); this.client = new MongoClient(this.mongoServer.getUri(), { directConnection: config.MONGODB_DIRECT_CONNECTION === "true", }); } else { this.client = new MongoClient(config.MONGODB_URL, { directConnection: config.MONGODB_DIRECT_CONNECTION === "true", }); } try { logger.info("Connecting to database"); await this.client.connect(); logger.info("Connected to database"); this.client.db(config.MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : "")); await this.initDatabase(); } catch (err) { logger.error(err, "Error connecting to database"); process.exit(1); } // Disconnect DB on exit onExit(async () => { logger.info("Closing database connection"); await this.client?.close(true); await this.mongoServer?.stop(); }); } public static async getInstance(): Promise { if (!Database.instance) { Database.instance = new Database(); await Database.instance.init(); } return Database.instance; } /** * Return mongoClient */ public getClient(): MongoClient { if (!this.client) { throw new Error("Database not initialized"); } return this.client; } /** * Return map of database's collections */ public getCollections() { if (!this.client) { throw new Error("Database not initialized"); } const db = this.client.db( config.MONGODB_DB_NAME + (import.meta.env.MODE === "test" ? "-test" : "") ); // Collections with default readPreference (primary) - critical for read-after-write consistency const conversations = db.collection("conversations"); const settings = db.collection("settings"); const users = db.collection("users"); const sessions = db.collection("sessions"); const messageEvents = db.collection("messageEvents"); const abortedGenerations = db.collection("abortedGenerations"); const semaphores = db.collection("semaphores"); const tokenCaches = db.collection("tokens"); const configCollection = db.collection("config"); const migrationResults = db.collection("migrationResults"); const sharedConversations = db.collection("sharedConversations"); const bucket = new GridFSBucket(db, { bucketName: "files" }); // Collections with secondaryPreferred - heavy reads, can tolerate slight replication lag const secondaryPreferred = ReadPreference.SECONDARY_PREFERRED; const assistants = db.collection("assistants", { readPreference: secondaryPreferred, }); const assistantStats = db.collection("assistants.stats", { readPreference: secondaryPreferred, }); const conversationStats = db.collection(CONVERSATION_STATS_COLLECTION, { readPreference: secondaryPreferred, }); const reports = db.collection("reports", { readPreference: secondaryPreferred, }); const tools = db.collection("tools", { readPreference: secondaryPreferred, }); return { conversations, conversationStats, assistants, assistantStats, reports, sharedConversations, abortedGenerations, settings, users, sessions, messageEvents, bucket, migrationResults, semaphores, tokenCaches, tools, config: configCollection, }; } /** * Init database once connected: Index creation * @private */ private initDatabase() { const { conversations, conversationStats, assistants, assistantStats, reports, sharedConversations, abortedGenerations, settings, users, sessions, messageEvents, semaphores, tokenCaches, config, } = this.getCollections(); conversations .createIndex( { sessionId: 1, updatedAt: -1 }, { partialFilterExpression: { sessionId: { $exists: true } } } ) .catch((e) => logger.error(e, "Error creating index for conversations by sessionId and updatedAt") ); conversations .createIndex( { userId: 1, updatedAt: -1 }, { partialFilterExpression: { userId: { $exists: true } } } ) .catch((e) => logger.error(e, "Error creating index for conversations by userId and updatedAt") ); conversations .createIndex( { "message.id": 1, "message.ancestors": 1 }, { partialFilterExpression: { userId: { $exists: true } } } ) .catch((e) => logger.error(e, "Error creating index for conversations by messageId and ancestors") ); // Not strictly necessary, could use _id, but more convenient. Also for stats // To do stats on conversation messages conversations .createIndex({ "messages.createdAt": 1 }, { sparse: true }) .catch((e) => logger.error(e, "Error creating index for conversations by messages createdAt") ); // Unique index for stats conversationStats .createIndex( { type: 1, "date.field": 1, "date.span": 1, "date.at": 1, distinct: 1, }, { unique: true } ) .catch((e) => logger.error( e, "Error creating index for conversationStats by type, date.field and date.span" ) ); // Allow easy check of last computed stat for given type/dateField conversationStats .createIndex({ type: 1, "date.field": 1, "date.at": 1, }) .catch((e) => logger.error(e, "Error creating index for abortedGenerations by updatedAt")); abortedGenerations .createIndex({ updatedAt: 1 }, { expireAfterSeconds: 30 }) .catch((e) => logger.error( e, "Error creating index for abortedGenerations by updatedAt and expireAfterSeconds" ) ); abortedGenerations .createIndex({ conversationId: 1 }, { unique: true }) .catch((e) => logger.error(e, "Error creating index for abortedGenerations by conversationId") ); sharedConversations.createIndex({ hash: 1 }, { unique: true }).catch((e) => logger.error(e)); settings .createIndex({ sessionId: 1 }, { unique: true, sparse: true }) .catch((e) => logger.error(e, "Error creating index for settings by sessionId")); settings .createIndex({ userId: 1 }, { unique: true, sparse: true }) .catch((e) => logger.error(e, "Error creating index for settings by userId")); settings .createIndex({ assistants: 1 }) .catch((e) => logger.error(e, "Error creating index for settings by assistants")); users .createIndex({ hfUserId: 1 }, { unique: true }) .catch((e) => logger.error(e, "Error creating index for users by hfUserId")); users .createIndex({ sessionId: 1 }, { unique: true, sparse: true }) .catch((e) => logger.error(e, "Error creating index for users by sessionId")); // No unicity because due to renames & outdated info from oauth provider, there may be the same username on different users users .createIndex({ username: 1 }) .catch((e) => logger.error(e, "Error creating index for users by username")); // For stats queries filtering users by creation date users .createIndex({ createdAt: 1 }) .catch((e) => logger.error(e, "Error creating index for users by createdAt")); messageEvents .createIndex({ expiresAt: 1 }, { expireAfterSeconds: 1 }) .catch((e) => logger.error(e, "Error creating index for messageEvents by expiresAt")); sessions.createIndex({ expiresAt: 1 }, { expireAfterSeconds: 0 }).catch((e) => logger.error(e)); sessions .createIndex({ sessionId: 1 }, { unique: true }) .catch((e) => logger.error(e, "Error creating index for sessions by sessionId")); assistants .createIndex({ createdById: 1, userCount: -1 }) .catch((e) => logger.error(e, "Error creating index for assistants by createdById and userCount") ); assistants .createIndex({ userCount: 1 }) .catch((e) => logger.error(e, "Error creating index for assistants by userCount")); assistants .createIndex({ review: 1, userCount: -1 }) .catch((e) => logger.error(e, "Error creating index for assistants by review and userCount")); assistants .createIndex({ modelId: 1, userCount: -1 }) .catch((e) => logger.error(e, "Error creating index for assistants by modelId and userCount") ); assistants .createIndex({ searchTokens: 1 }) .catch((e) => logger.error(e, "Error creating index for assistants by searchTokens")); assistants .createIndex({ last24HoursCount: 1 }) .catch((e) => logger.error(e, "Error creating index for assistants by last24HoursCount")); assistants .createIndex({ last24HoursUseCount: -1, useCount: -1, _id: 1 }) .catch((e) => logger.error(e, "Error creating index for assistants by last24HoursUseCount and useCount") ); assistantStats // Order of keys is important for the queries .createIndex({ "date.span": 1, "date.at": 1, assistantId: 1 }, { unique: true }) .catch((e) => logger.error( e, "Error creating index for assistantStats by date.span and date.at and assistantId" ) ); reports .createIndex({ assistantId: 1 }) .catch((e) => logger.error(e, "Error creating index for reports by assistantId")); reports .createIndex({ createdBy: 1, assistantId: 1 }) .catch((e) => logger.error(e, "Error creating index for reports by createdBy and assistantId") ); // Unique index for semaphore and migration results semaphores.createIndex({ key: 1 }, { unique: true }).catch((e) => logger.error(e)); semaphores .createIndex({ deleteAt: 1 }, { expireAfterSeconds: 1 }) .catch((e) => logger.error(e, "Error creating index for semaphores by deleteAt")); tokenCaches .createIndex({ createdAt: 1 }, { expireAfterSeconds: 5 * 60 }) .catch((e) => logger.error(e, "Error creating index for tokenCaches by createdAt")); tokenCaches .createIndex({ tokenHash: 1 }) .catch((e) => logger.error(e, "Error creating index for tokenCaches by tokenHash")); // Tools removed: skipping tools indexes conversations .createIndex({ "messages.from": 1, createdAt: 1, }) .catch((e) => logger.error(e, "Error creating index for conversations by messages from and createdAt") ); conversations .createIndex({ userId: 1, sessionId: 1, }) .catch((e) => logger.error(e, "Error creating index for conversations by userId and sessionId") ); // For stats aggregation jobs that filter by createdAt/updatedAt alone conversations .createIndex({ createdAt: 1 }) .catch((e) => logger.error(e, "Error creating index for conversations by createdAt")); conversations .createIndex({ updatedAt: 1 }) .catch((e) => logger.error(e, "Error creating index for conversations by updatedAt")); config .createIndex({ key: 1 }, { unique: true }) .catch((e) => logger.error(e, "Error creating index for config by key")); } } export let collections: ReturnType; export const ready = (async () => { if (!building) { const db = await Database.getInstance(); collections = db.getCollections(); } else { collections = {} as unknown as ReturnType; } })(); export async function getCollectionsEarly(): Promise< ReturnType > { await ready; if (!collections) { throw new Error("Database not initialized"); } return collections; } ================================================ FILE: src/lib/server/endpoints/document.ts ================================================ import type { MessageFile } from "$lib/types/Message"; import { z } from "zod"; export interface FileProcessorOptions { supportedMimeTypes: TMimeType[]; maxSizeInMB: number; } // Removed unused ImageProcessor type alias export const createDocumentProcessorOptionsValidator = ( defaults: FileProcessorOptions ) => { return z .object({ supportedMimeTypes: z .array( z.enum([ defaults.supportedMimeTypes[0], ...defaults.supportedMimeTypes.slice(1), ]) ) .default(defaults.supportedMimeTypes), maxSizeInMB: z.number().positive().default(defaults.maxSizeInMB), }) .default(defaults); }; // Removed unused DocumentProcessor type alias export type AsyncDocumentProcessor = ( file: MessageFile ) => Promise<{ file: Buffer; mime: TMimeType; }>; export function makeDocumentProcessor( options: FileProcessorOptions ): AsyncDocumentProcessor { return async (file) => { const { supportedMimeTypes, maxSizeInMB } = options; const { mime, value } = file; const buffer = Buffer.from(value, "base64"); const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000; if (tooLargeInBytes) { throw Error("Document is too large"); } const outputMime = validateMimeType(supportedMimeTypes, mime); return { file: buffer, mime: outputMime }; }; } const validateMimeType = ( supportedMimes: T, mime: string ): T[number] => { if (!supportedMimes.includes(mime)) { const supportedMimesStr = supportedMimes.join(", "); throw Error(`Mimetype "${mime}" not found in supported mimes: ${supportedMimesStr}`); } return mime; }; ================================================ FILE: src/lib/server/endpoints/endpoints.ts ================================================ import type { Conversation } from "$lib/types/Conversation"; import type { Message } from "$lib/types/Message"; import type { TextGenerationStreamOutput, TextGenerationStreamToken, InferenceProvider, } from "@huggingface/inference"; import { z } from "zod"; import { endpointOAIParametersSchema, endpointOai } from "./openai/endpointOai"; import type { Model } from "$lib/types/Model"; import type { ObjectId } from "mongodb"; export type EndpointMessage = Omit; // parameters passed when generating text export interface EndpointParameters { messages: EndpointMessage[]; preprompt?: Conversation["preprompt"]; generateSettings?: Partial; isMultimodal?: boolean; conversationId?: ObjectId; locals: App.Locals | undefined; abortSignal?: AbortSignal; /** Inference provider preference: "auto", "fastest", "cheapest", or a specific provider name */ provider?: string; } export type TextGenerationStreamOutputSimplified = TextGenerationStreamOutput & { token: TextGenerationStreamToken; routerMetadata?: { route?: string; model?: string; provider?: InferenceProvider }; }; // type signature for the endpoint export type Endpoint = ( params: EndpointParameters ) => Promise>; // list of all endpoint generators export const endpoints = { openai: endpointOai, }; export const endpointSchema = z.discriminatedUnion("type", [endpointOAIParametersSchema]); export default endpoints; ================================================ FILE: src/lib/server/endpoints/images.ts ================================================ import type { Sharp } from "sharp"; import sharp from "sharp"; import type { MessageFile } from "$lib/types/Message"; import { z, type util } from "zod"; export interface ImageProcessorOptions { supportedMimeTypes: TMimeType[]; preferredMimeType: TMimeType; maxSizeInMB: number; maxWidth: number; maxHeight: number; } export type ImageProcessor = (file: MessageFile) => Promise<{ image: Buffer; mime: TMimeType; }>; export function createImageProcessorOptionsValidator( defaults: ImageProcessorOptions ) { return z .object({ supportedMimeTypes: z .array( z.enum([ defaults.supportedMimeTypes[0], ...defaults.supportedMimeTypes.slice(1), ]) ) .default(defaults.supportedMimeTypes), preferredMimeType: z .enum([defaults.supportedMimeTypes[0], ...defaults.supportedMimeTypes.slice(1)]) .default(defaults.preferredMimeType as util.noUndefined), maxSizeInMB: z.number().positive().default(defaults.maxSizeInMB), maxWidth: z.number().int().positive().default(defaults.maxWidth), maxHeight: z.number().int().positive().default(defaults.maxHeight), }) .default(defaults); } export function makeImageProcessor( options: ImageProcessorOptions ): ImageProcessor { return async (file) => { const { supportedMimeTypes, preferredMimeType, maxSizeInMB, maxWidth, maxHeight } = options; const { mime, value } = file; const buffer = Buffer.from(value, "base64"); let sharpInst = sharp(buffer); const metadata = await sharpInst.metadata(); if (!metadata) throw Error("Failed to read image metadata"); const { width, height } = metadata; if (width === undefined || height === undefined) throw Error("Failed to read image size"); const tooLargeInSize = width > maxWidth || height > maxHeight; const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000; const outputMime = chooseMimeType(supportedMimeTypes, preferredMimeType, mime, { preferSizeReduction: tooLargeInBytes, }); // Resize if necessary if (tooLargeInSize || tooLargeInBytes) { const size = chooseImageSize({ mime: outputMime, width, height, maxWidth, maxHeight, maxSizeInMB, }); if (size.width !== width || size.height !== height) { sharpInst = resizeImage(sharpInst, size.width, size.height); } } // Convert format if necessary // We always want to convert the image when the file was too large in bytes // so we can guarantee that ideal options are used, which are expected when // choosing the image size if (outputMime !== mime || tooLargeInBytes) { sharpInst = convertImage(sharpInst, outputMime); } const processedImage = await sharpInst.toBuffer(); return { image: processedImage, mime: outputMime }; }; } const outputFormats = ["png", "jpeg", "webp", "avif", "tiff", "gif"] as const; type OutputImgFormat = (typeof outputFormats)[number]; const isOutputFormat = (format: string): format is (typeof outputFormats)[number] => outputFormats.includes(format as OutputImgFormat); export function convertImage(sharpInst: Sharp, outputMime: string): Sharp { const [type, format] = outputMime.split("/"); if (type !== "image") throw Error(`Requested non-image mime type: ${outputMime}`); if (!isOutputFormat(format)) { throw Error(`Requested to convert to an unsupported format: ${format}`); } return sharpInst[format](); } // heic/heif requires proprietary license // TODO: blocking heif may be incorrect considering it also supports av1, so we should instead // detect the compression method used via sharp().metadata().compression // TODO: consider what to do about animated formats: apng, gif, animated webp, ... const blocklistedMimes = ["image/heic", "image/heif"]; /** Sorted from largest to smallest */ const mimesBySizeDesc = [ "image/png", "image/tiff", "image/gif", "image/jpeg", "image/webp", "image/avif", ]; /** * Defaults to preferred format or uses existing mime if supported * When preferSizeReduction is true, it will choose the smallest format that is supported **/ function chooseMimeType( supportedMimes: T, preferredMime: string, mime: string, { preferSizeReduction }: { preferSizeReduction: boolean } ): T[number] { if (!supportedMimes.includes(preferredMime)) { const supportedMimesStr = supportedMimes.join(", "); throw Error( `Preferred format "${preferredMime}" not found in supported mimes: ${supportedMimesStr}` ); } const [type] = mime.split("/"); if (type !== "image") throw Error(`Received non-image mime type: ${mime}`); if (supportedMimes.includes(mime) && !preferSizeReduction) return mime; if (blocklistedMimes.includes(mime)) throw Error(`Received blocklisted mime type: ${mime}`); const smallestMime = mimesBySizeDesc.findLast((m) => supportedMimes.includes(m)); return smallestMime ?? preferredMime; } interface ImageSizeOptions { mime: string; width: number; height: number; maxWidth: number; maxHeight: number; maxSizeInMB: number; } /** Resizes the image to fit within the specified size in MB by guessing the output size */ export function chooseImageSize({ mime, width, height, maxWidth, maxHeight, maxSizeInMB, }: ImageSizeOptions): { width: number; height: number } { const biggestDiscrepency = Math.max(1, width / maxWidth, height / maxHeight); let selectedWidth = Math.ceil(width / biggestDiscrepency); let selectedHeight = Math.ceil(height / biggestDiscrepency); do { const estimatedSize = estimateImageSizeInBytes(mime, selectedWidth, selectedHeight); if (estimatedSize < maxSizeInMB * 1024 * 1024) { return { width: selectedWidth, height: selectedHeight }; } selectedWidth = Math.floor(selectedWidth / 1.1); selectedHeight = Math.floor(selectedHeight / 1.1); } while (selectedWidth > 1 && selectedHeight > 1); throw Error(`Failed to resize image to fit within ${maxSizeInMB}MB`); } const mimeToCompressionRatio: Record = { "image/png": 1 / 2, "image/jpeg": 1 / 10, "image/webp": 1 / 4, "image/avif": 1 / 5, "image/tiff": 1, "image/gif": 1 / 5, }; /** * Guesses the side of an image in MB based on its format and dimensions * Should guess the worst case **/ function estimateImageSizeInBytes(mime: string, width: number, height: number): number { const compressionRatio = mimeToCompressionRatio[mime]; if (!compressionRatio) throw Error(`Unsupported image format: ${mime}`); const bitsPerPixel = 32; // Assuming 32-bit color depth for 8-bit R G B A const bytesPerPixel = bitsPerPixel / 8; const uncompressedSize = width * height * bytesPerPixel; return uncompressedSize * compressionRatio; } export function resizeImage(sharpInst: Sharp, maxWidth: number, maxHeight: number): Sharp { return sharpInst.resize({ width: maxWidth, height: maxHeight, fit: "inside" }); } ================================================ FILE: src/lib/server/endpoints/openai/endpointOai.ts ================================================ import { z } from "zod"; import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream"; import { openAIChatToTextGenerationSingle, openAIChatToTextGenerationStream, } from "./openAIChatToTextGenerationStream"; import type { CompletionCreateParamsStreaming } from "openai/resources/completions"; import type { ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, } from "openai/resources/chat/completions"; import { buildPrompt } from "$lib/buildPrompt"; import { config } from "$lib/server/config"; import type { Endpoint } from "../endpoints"; import type OpenAI from "openai"; import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images"; import { prepareMessagesWithFiles } from "$lib/server/textGeneration/utils/prepareFiles"; // uuid import removed (no tool call ids) export const endpointOAIParametersSchema = z.object({ weight: z.number().int().positive().default(1), model: z.any(), type: z.literal("openai"), baseURL: z.string().url().default("https://api.openai.com/v1"), // Canonical auth token is OPENAI_API_KEY; keep HF_TOKEN as legacy alias apiKey: z.string().default(config.OPENAI_API_KEY || config.HF_TOKEN || "sk-"), completion: z .union([z.literal("completions"), z.literal("chat_completions")]) .default("chat_completions"), defaultHeaders: z.record(z.string()).optional(), defaultQuery: z.record(z.string()).optional(), extraBody: z.record(z.any()).optional(), multimodal: z .object({ image: createImageProcessorOptionsValidator({ supportedMimeTypes: [ // Restrict to the most widely-supported formats "image/png", "image/jpeg", ], preferredMimeType: "image/jpeg", maxSizeInMB: 1, maxWidth: 1024, maxHeight: 1024, }), }) .default({}), /* enable use of max_completion_tokens in place of max_tokens */ useCompletionTokens: z.boolean().default(false), streamingSupported: z.boolean().default(true), }); export async function endpointOai( input: z.input ): Promise { const { baseURL, apiKey, completion, model, defaultHeaders, defaultQuery, multimodal, extraBody, useCompletionTokens, streamingSupported, } = endpointOAIParametersSchema.parse(input); let OpenAI; try { OpenAI = (await import("openai")).OpenAI; } catch (e) { throw new Error("Failed to import OpenAI", { cause: e }); } // Store router metadata if captured let routerMetadata: { route?: string; model?: string; provider?: string } = {}; // Custom fetch wrapper to capture response headers for router metadata const customFetch = async (url: RequestInfo, init?: RequestInit): Promise => { const response = await fetch(url, init); // Capture router headers if present (fallback for non-streaming) const routeHeader = response.headers.get("X-Router-Route"); const modelHeader = response.headers.get("X-Router-Model"); const providerHeader = response.headers.get("x-inference-provider"); if (routeHeader && modelHeader) { routerMetadata = { route: routeHeader, model: modelHeader, provider: providerHeader || undefined, }; } else if (providerHeader) { // Even without router metadata, capture provider info routerMetadata = { provider: providerHeader, }; } return response; }; const openai = new OpenAI({ apiKey: apiKey || "sk-", baseURL, defaultHeaders: { ...(config.PUBLIC_APP_NAME === "HuggingChat" && { "User-Agent": "huggingchat" }), ...defaultHeaders, }, defaultQuery, fetch: customFetch, }); const imageProcessor = makeImageProcessor(multimodal.image); if (completion === "completions") { return async ({ messages, preprompt, generateSettings, conversationId, locals, abortSignal, provider, }) => { const prompt = await buildPrompt({ messages, preprompt, model, }); // Build model ID with optional provider suffix (e.g., "model:fastest" or "model:together") const baseModelId = model.id ?? model.name; const modelId = provider && provider !== "auto" ? `${baseModelId}:${provider}` : baseModelId; const parameters = { ...model.parameters, ...generateSettings }; const body: CompletionCreateParamsStreaming = { model: modelId, prompt, stream: true, max_tokens: parameters?.max_tokens, stop: parameters?.stop, temperature: parameters?.temperature, top_p: parameters?.top_p, frequency_penalty: parameters?.frequency_penalty, presence_penalty: parameters?.presence_penalty, }; const openAICompletion = await openai.completions.create(body, { body: { ...body, ...extraBody }, headers: { "ChatUI-Conversation-ID": conversationId?.toString() ?? "", "X-use-cache": "false", ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), // Bill to organization if configured ...(locals?.billingOrganization ? { "X-HF-Bill-To": locals.billingOrganization } : {}), }, signal: abortSignal, }); return openAICompletionToTextGenerationStream(openAICompletion); }; } else if (completion === "chat_completions") { return async ({ messages, preprompt, generateSettings, conversationId, isMultimodal, locals, abortSignal, provider, }) => { // Format messages for the chat API, handling multimodal content if supported let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = await prepareMessagesWithFiles(messages, imageProcessor, isMultimodal ?? model.multimodal); // Normalize preprompt and handle empty values const normalizedPreprompt = typeof preprompt === "string" ? preprompt.trim() : ""; // Check if a system message already exists as the first message const hasSystemMessage = messagesOpenAI.length > 0 && messagesOpenAI[0]?.role === "system"; if (hasSystemMessage) { // Prepend normalized preprompt to existing system content when non-empty if (normalizedPreprompt) { const userSystemPrompt = (typeof messagesOpenAI[0].content === "string" ? (messagesOpenAI[0].content as string) : "") || ""; messagesOpenAI[0].content = normalizedPreprompt + (userSystemPrompt ? "\n\n" + userSystemPrompt : ""); } } else { // Insert a system message only if the preprompt is non-empty if (normalizedPreprompt) { messagesOpenAI = [{ role: "system", content: normalizedPreprompt }, ...messagesOpenAI]; } } // Combine model defaults with request-specific parameters const parameters = { ...model.parameters, ...generateSettings }; // Build model ID with optional provider suffix (e.g., "model:fastest" or "model:together") const baseModelId = model.id ?? model.name; const modelId = provider && provider !== "auto" ? `${baseModelId}:${provider}` : baseModelId; const body = { model: modelId, messages: messagesOpenAI, stream: streamingSupported, // Support two different ways of specifying token limits depending on the model ...(useCompletionTokens ? { max_completion_tokens: parameters?.max_tokens } : { max_tokens: parameters?.max_tokens }), stop: parameters?.stop, temperature: parameters?.temperature, top_p: parameters?.top_p, frequency_penalty: parameters?.frequency_penalty, presence_penalty: parameters?.presence_penalty, }; // Handle both streaming and non-streaming responses with appropriate processors if (streamingSupported) { const openChatAICompletion = await openai.chat.completions.create( body as ChatCompletionCreateParamsStreaming, { body: { ...body, ...extraBody }, headers: { "ChatUI-Conversation-ID": conversationId?.toString() ?? "", "X-use-cache": "false", ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), // Bill to organization if configured ...(locals?.billingOrganization ? { "X-HF-Bill-To": locals.billingOrganization } : {}), }, signal: abortSignal, } ); return openAIChatToTextGenerationStream(openChatAICompletion, () => routerMetadata); } else { const openChatAICompletion = await openai.chat.completions.create( body as ChatCompletionCreateParamsNonStreaming, { body: { ...body, ...extraBody }, headers: { "ChatUI-Conversation-ID": conversationId?.toString() ?? "", "X-use-cache": "false", ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), // Bill to organization if configured ...(locals?.billingOrganization ? { "X-HF-Bill-To": locals.billingOrganization } : {}), }, signal: abortSignal, } ); return openAIChatToTextGenerationSingle(openChatAICompletion, () => routerMetadata); } }; } else { throw new Error("Invalid completion type"); } } ================================================ FILE: src/lib/server/endpoints/openai/openAIChatToTextGenerationStream.ts ================================================ import type { TextGenerationStreamOutput } from "@huggingface/inference"; import type OpenAI from "openai"; import type { Stream } from "openai/streaming"; /** * Transform a stream of OpenAI.Chat.ChatCompletion into a stream of TextGenerationStreamOutput */ export async function* openAIChatToTextGenerationStream( completionStream: Stream, getRouterMetadata?: () => { route?: string; model?: string; provider?: string } ) { let generatedText = ""; let tokenId = 0; let toolBuffer = ""; // legacy hack kept harmless let metadataYielded = false; let thinkOpen = false; for await (const completion of completionStream) { const retyped = completion as { "x-router-metadata"?: { route: string; model: string; provider?: string }; }; // Check if this chunk contains router metadata (first chunk from llm-router) if (!metadataYielded && retyped["x-router-metadata"]) { const metadata = retyped["x-router-metadata"]; yield { token: { id: tokenId++, text: "", logprob: 0, special: true, }, generated_text: null, details: null, routerMetadata: { route: metadata.route, model: metadata.model, provider: metadata.provider, }, } as TextGenerationStreamOutput & { routerMetadata: { route: string; model: string; provider?: string }; }; metadataYielded = true; // Skip processing this chunk as content since it's just metadata if ( !completion.choices || completion.choices.length === 0 || !completion.choices[0].delta?.content ) { continue; } } const { choices } = completion; const delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & { reasoning?: string; reasoning_content?: string; } = choices?.[0]?.delta ?? {}; const content: string = delta.content ?? ""; const reasoning: string = typeof delta?.reasoning === "string" ? (delta.reasoning as string) : typeof delta?.reasoning_content === "string" ? (delta.reasoning_content as string) : ""; const last = choices?.[0]?.finish_reason === "stop" || choices?.[0]?.finish_reason === "length"; // if the last token is a stop and the tool buffer is not empty, yield it as a generated_text if (choices?.[0]?.finish_reason === "stop" && toolBuffer.length > 0) { yield { token: { id: tokenId++, special: true, logprob: 0, text: "", }, generated_text: toolBuffer, details: null, } as TextGenerationStreamOutput; break; } // weird bug where the parameters are streamed in like this if (choices?.[0]?.delta?.tool_calls) { const calls = Array.isArray(choices[0].delta.tool_calls) ? choices[0].delta.tool_calls : [choices[0].delta.tool_calls]; if ( calls.length === 1 && calls[0].index === 0 && calls[0].id === "" && calls[0].type === "function" && !!calls[0].function && calls[0].function.name === null ) { toolBuffer += calls[0].function.arguments; continue; } } let combined = ""; if (reasoning && reasoning.length > 0) { if (!thinkOpen) { combined += "" + reasoning; thinkOpen = true; } else { combined += reasoning; } } if (content && content.length > 0) { const trimmed = content.trim(); // Allow tags in content to pass through (for models like DeepSeek R1) if (thinkOpen && trimmed === "") { // close once without duplicating the tag combined += ""; thinkOpen = false; } else if (thinkOpen) { combined += "" + content; thinkOpen = false; } else { combined += content; } } // Accumulate the combined token into the full text generatedText += combined; const output: TextGenerationStreamOutput = { token: { id: tokenId++, text: combined, logprob: 0, special: last, }, generated_text: last ? generatedText : null, details: null, }; yield output; // Tools removed: ignore tool_calls deltas } // If metadata wasn't yielded from chunks (e.g., from headers), yield it at the end if (!metadataYielded && getRouterMetadata) { const routerMetadata = getRouterMetadata(); // Yield if we have either complete router metadata OR just provider info if ( (routerMetadata && routerMetadata.route && routerMetadata.model) || routerMetadata?.provider ) { yield { token: { id: tokenId++, text: "", logprob: 0, special: true, }, generated_text: null, details: null, routerMetadata, } as TextGenerationStreamOutput & { routerMetadata: { route?: string; model?: string; provider?: string }; }; } } } /** * Transform a non-streaming OpenAI chat completion into a stream of TextGenerationStreamOutput */ export async function* openAIChatToTextGenerationSingle( completion: OpenAI.Chat.Completions.ChatCompletion, getRouterMetadata?: () => { route?: string; model?: string; provider?: string } ) { const message: NonNullable["message"] & { reasoning?: string; reasoning_content?: string; } = completion.choices?.[0]?.message ?? {}; let content: string = message?.content || ""; // Provider-dependent reasoning shapes (non-streaming) const r: string = typeof message?.reasoning === "string" ? (message.reasoning as string) : typeof message?.reasoning_content === "string" ? (message.reasoning_content as string) : ""; if (r && r.length > 0) { content = `${r}` + content; } const tokenId = 0; // Yield the content as a single token yield { token: { id: tokenId, text: content, logprob: 0, special: false, }, generated_text: content, details: null, ...(getRouterMetadata ? (() => { const metadata = getRouterMetadata(); return (metadata && metadata.route && metadata.model) || metadata?.provider ? { routerMetadata: metadata } : {}; })() : {}), } as TextGenerationStreamOutput & { routerMetadata?: { route?: string; model?: string; provider?: string }; }; } ================================================ FILE: src/lib/server/endpoints/openai/openAICompletionToTextGenerationStream.ts ================================================ import type { TextGenerationStreamOutput } from "@huggingface/inference"; import type OpenAI from "openai"; import type { Stream } from "openai/streaming"; /** * Transform a stream of OpenAI.Completions.Completion into a stream of TextGenerationStreamOutput */ export async function* openAICompletionToTextGenerationStream( completionStream: Stream ) { let generatedText = ""; let tokenId = 0; for await (const completion of completionStream) { const { choices } = completion; const text = choices?.[0]?.text ?? ""; const last = choices?.[0]?.finish_reason === "stop" || choices?.[0]?.finish_reason === "length"; if (text) { generatedText = generatedText + text; } const output: TextGenerationStreamOutput = { token: { id: tokenId++, text, logprob: 0, special: last, }, generated_text: last ? generatedText : null, details: null, }; yield output; } } ================================================ FILE: src/lib/server/endpoints/preprocessMessages.ts ================================================ import type { Message } from "$lib/types/Message"; import type { EndpointMessage } from "./endpoints"; import { downloadFile } from "../files/downloadFile"; import type { ObjectId } from "mongodb"; export async function preprocessMessages( messages: Message[], convId: ObjectId ): Promise { return Promise.resolve(messages) .then((msgs) => downloadFiles(msgs, convId)) .then((msgs) => injectClipboardFiles(msgs)) .then(stripEmptyInitialSystemMessage); } async function downloadFiles(messages: Message[], convId: ObjectId): Promise { return Promise.all( messages.map>((message) => Promise.all((message.files ?? []).map((file) => downloadFile(file.value, convId))).then( (files) => ({ ...message, files }) ) ) ); } async function injectClipboardFiles(messages: EndpointMessage[]) { return Promise.all( messages.map((message) => { const plaintextFiles = message.files ?.filter((file) => file.mime === "application/vnd.chatui.clipboard") .map((file) => Buffer.from(file.value, "base64").toString("utf-8")); if (!plaintextFiles || plaintextFiles.length === 0) return message; return { ...message, content: `${plaintextFiles.join("\n\n")}\n\n${message.content}`, files: message.files?.filter((file) => file.mime !== "application/vnd.chatui.clipboard"), }; }) ); } /** * Remove an initial system message if its content is empty/whitespace only. * This prevents sending an empty system prompt to any provider. */ function stripEmptyInitialSystemMessage(messages: EndpointMessage[]): EndpointMessage[] { if (!messages?.length) return messages; const first = messages[0]; if (first?.from !== "system") return messages; const content = first?.content as unknown; const isEmpty = typeof content === "string" ? content.trim().length === 0 : false; if (isEmpty) { return messages.slice(1); } return messages; } ================================================ FILE: src/lib/server/exitHandler.ts ================================================ import { randomUUID } from "$lib/utils/randomUuid"; import { timeout } from "$lib/utils/timeout"; import { logger } from "./logger"; type ExitHandler = () => void | Promise; type ExitHandlerUnsubscribe = () => void; const listeners = new Map(); export function onExit(cb: ExitHandler): ExitHandlerUnsubscribe { const uuid = randomUUID(); listeners.set(uuid, cb); return () => { listeners.delete(uuid); }; } async function runExitHandler(handler: ExitHandler): Promise { return timeout(Promise.resolve().then(handler), 30_000).catch((err) => { logger.error(err, "Exit handler failed to run"); }); } export function initExitHandler() { let signalCount = 0; const exitHandler = async () => { if (signalCount === 1) { logger.info("Received signal... Exiting"); await Promise.all(Array.from(listeners.values()).map(runExitHandler)); logger.info("All exit handlers ran... Waiting for svelte server to exit"); } }; process.on("SIGINT", () => { signalCount++; if (signalCount >= 2) { process.kill(process.pid, "SIGKILL"); } else { exitHandler().catch((err) => { logger.error(err, "Error in exit handler on SIGINT:"); process.kill(process.pid, "SIGKILL"); }); } }); process.on("SIGTERM", () => { signalCount++; if (signalCount >= 2) { process.kill(process.pid, "SIGKILL"); } else { exitHandler().catch((err) => { logger.error(err, "Error in exit handler on SIGTERM:"); process.kill(process.pid, "SIGKILL"); }); } }); } ================================================ FILE: src/lib/server/files/downloadFile.ts ================================================ import { error } from "@sveltejs/kit"; import { collections } from "$lib/server/database"; import type { Conversation } from "$lib/types/Conversation"; import type { SharedConversation } from "$lib/types/SharedConversation"; import type { MessageFile } from "$lib/types/Message"; export async function downloadFile( sha256: string, convId: Conversation["_id"] | SharedConversation["_id"] ): Promise { const fileId = collections.bucket.find({ filename: `${convId.toString()}-${sha256}` }); const file = await fileId.next(); if (!file) { error(404, "File not found"); } if (file.metadata?.conversation !== convId.toString()) { error(403, "You don't have access to this file."); } const mime = file.metadata?.mime; const name = file.filename; const fileStream = collections.bucket.openDownloadStream(file._id); const buffer = await new Promise((resolve, reject) => { const chunks: Uint8Array[] = []; fileStream.on("data", (chunk) => chunks.push(chunk)); fileStream.on("error", reject); fileStream.on("end", () => resolve(Buffer.concat(chunks))); }); return { type: "base64", name, value: buffer.toString("base64"), mime }; } ================================================ FILE: src/lib/server/files/uploadFile.ts ================================================ import type { Conversation } from "$lib/types/Conversation"; import type { MessageFile } from "$lib/types/Message"; import { sha256 } from "$lib/utils/sha256"; import { fileTypeFromBuffer } from "file-type"; import { collections } from "$lib/server/database"; export async function uploadFile(file: File, conv: Conversation): Promise { const sha = await sha256(await file.text()); const buffer = await file.arrayBuffer(); // Attempt to detect the mime type of the file, fallback to the uploaded mime const mime = await fileTypeFromBuffer(buffer).then((fileType) => fileType?.mime ?? file.type); const upload = collections.bucket.openUploadStream(`${conv._id}-${sha}`, { metadata: { conversation: conv._id.toString(), mime }, }); upload.write((await file.arrayBuffer()) as unknown as Buffer); upload.end(); // only return the filename when upload throws a finish event or a 20s time out occurs return new Promise((resolve, reject) => { upload.once("finish", () => resolve({ type: "hash", value: sha, mime: file.type, name: file.name }) ); upload.once("error", reject); setTimeout(() => reject(new Error("Upload timed out")), 20_000); }); } ================================================ FILE: src/lib/server/findRepoRoot.ts ================================================ import { existsSync } from "fs"; import { join, dirname } from "path"; export function findRepoRoot(startPath: string): string { let currentPath = startPath; while (currentPath !== "/") { if (existsSync(join(currentPath, "package.json"))) { return currentPath; } currentPath = dirname(currentPath); } throw new Error("Could not find repository root (no package.json found)"); } ================================================ FILE: src/lib/server/generateFromDefaultEndpoint.ts ================================================ import { taskModel, models } from "$lib/server/models"; import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate"; import type { EndpointMessage } from "./endpoints/endpoints"; export async function* generateFromDefaultEndpoint({ messages, preprompt, generateSettings, modelId, locals, }: { messages: EndpointMessage[]; preprompt?: string; generateSettings?: Record; /** Optional: use this model instead of the default task model */ modelId?: string; locals: App.Locals | undefined; }): AsyncGenerator { try { // Choose endpoint based on provided modelId, else fall back to taskModel const model = modelId ? (models.find((m) => m.id === modelId) ?? taskModel) : taskModel; const endpoint = await model.getEndpoint(); const tokenStream = await endpoint({ messages, preprompt, generateSettings, locals }); for await (const output of tokenStream) { // if not generated_text is here it means the generation is not done if (output.generated_text) { let generated_text = output.generated_text; for (const stop of [...(model.parameters?.stop ?? []), "<|endoftext|>"]) { if (generated_text.endsWith(stop)) { generated_text = generated_text.slice(0, -stop.length).trimEnd(); } } return generated_text; } yield { type: MessageUpdateType.Stream, token: output.token.text, }; } } catch (error) { return ""; } return ""; } ================================================ FILE: src/lib/server/hooks/error.ts ================================================ import type { HandleServerError } from "@sveltejs/kit"; import { logger } from "$lib/server/logger"; type HandleServerErrorInput = Parameters[0]; export async function handleServerError({ error, event, status, message, }: HandleServerErrorInput): Promise { // handle 404 if (event.route.id === null) { return { message: `Page ${event.url.pathname} not found`, }; } const errorId = crypto.randomUUID(); logger.error({ locals: event.locals, url: event.request.url, params: event.params, request: event.request, message, error, errorId, status, stack: error instanceof Error ? error.stack : undefined, }); return { message: "An error occurred", errorId, }; } ================================================ FILE: src/lib/server/hooks/fetch.ts ================================================ import type { HandleFetch } from "@sveltejs/kit"; import { isHostLocalhost } from "$lib/server/isURLLocal"; type HandleFetchInput = Parameters[0]; export async function handleFetchRequest({ event, request, fetch, }: HandleFetchInput): Promise { if (isHostLocalhost(new URL(request.url).hostname)) { const cookieHeader = event.request.headers.get("cookie"); if (cookieHeader) { const headers = new Headers(request.headers); headers.set("cookie", cookieHeader); return fetch(new Request(request, { headers })); } } return fetch(request); } ================================================ FILE: src/lib/server/hooks/handle.ts ================================================ import type { Handle, RequestEvent } from "@sveltejs/kit"; import { collections } from "$lib/server/database"; import { base } from "$app/paths"; import { dev } from "$app/environment"; import { authenticateRequest, loginEnabled, refreshSessionCookie, triggerOauthFlow, } from "$lib/server/auth"; import { ERROR_MESSAGES } from "$lib/stores/errors"; import { addWeeks } from "date-fns"; import { logger } from "$lib/server/logger"; import { adminTokenManager } from "$lib/server/adminToken"; import { isHostLocalhost } from "$lib/server/isURLLocal"; import { runWithRequestContext, updateRequestContext } from "$lib/server/requestContext"; import { config, ready } from "$lib/server/config"; type HandleInput = Parameters[0]; function getClientAddressSafe(event: RequestEvent): string | undefined { try { return event.getClientAddress(); } catch { return undefined; } } export async function handleRequest({ event, resolve }: HandleInput): Promise { // Generate a unique request ID for this request const requestId = crypto.randomUUID(); // Run the entire request handling within the request context return runWithRequestContext( async () => { await ready.then(() => { config.checkForUpdates(); }); logger.debug( { locals: event.locals, url: event.url.pathname, params: event.params, request: event.request, }, "Request received" ); function errorResponse(status: number, message: string) { const sendJson = event.request.headers.get("accept")?.includes("application/json") || event.request.headers.get("content-type")?.includes("application/json"); return new Response(sendJson ? JSON.stringify({ error: message }) : message, { status, headers: { "content-type": sendJson ? "application/json" : "text/plain", }, }); } if ( event.url.pathname.startsWith(`${base}/admin/`) || event.url.pathname === `${base}/admin` ) { const ADMIN_SECRET = config.ADMIN_API_SECRET || config.PARQUET_EXPORT_SECRET; if (!ADMIN_SECRET) { return errorResponse(500, "Admin API is not configured"); } if (event.request.headers.get("Authorization") !== `Bearer ${ADMIN_SECRET}`) { return errorResponse(401, "Unauthorized"); } } const isApi = event.url.pathname.startsWith(`${base}/api/`); const auth = await authenticateRequest( event.request.headers, event.cookies, event.url, isApi ); event.locals.sessionId = auth.sessionId; if (loginEnabled && !auth.user && !event.url.pathname.startsWith(`${base}/.well-known/`)) { if (config.AUTOMATIC_LOGIN === "true") { // AUTOMATIC_LOGIN: always redirect to OAuth flow (unless already on login or healthcheck pages) if ( !event.url.pathname.startsWith(`${base}/login`) && !event.url.pathname.startsWith(`${base}/healthcheck`) ) { // To get the same CSRF token after callback refreshSessionCookie(event.cookies, auth.secretSessionId); return await triggerOauthFlow(event); } } else { // Redirect to OAuth flow unless on the authorized pages (home, shared conversation, login, healthcheck, model thumbnails) if ( event.url.pathname !== `${base}/` && event.url.pathname !== `${base}` && !event.url.pathname.startsWith(`${base}/login`) && !event.url.pathname.startsWith(`${base}/login/callback`) && !event.url.pathname.startsWith(`${base}/healthcheck`) && !event.url.pathname.startsWith(`${base}/r/`) && !event.url.pathname.startsWith(`${base}/conversation/`) && !event.url.pathname.startsWith(`${base}/models/`) && !event.url.pathname.startsWith(`${base}/api`) ) { refreshSessionCookie(event.cookies, auth.secretSessionId); return triggerOauthFlow(event); } } } event.locals.user = auth.user || undefined; event.locals.token = auth.token; // Update request context with user after authentication if (auth.user?.username) { updateRequestContext({ user: auth.user.username }); } event.locals.isAdmin = event.locals.user?.isAdmin || adminTokenManager.isAdmin(event.locals.sessionId); // CSRF protection const requestContentType = event.request.headers.get("content-type")?.split(";")[0] ?? ""; /** https://developer.mozilla.org/en-US/docs/Web/HTML/Element/form#attr-enctype */ const nativeFormContentTypes = [ "multipart/form-data", "application/x-www-form-urlencoded", "text/plain", ]; if (event.request.method === "POST") { if (nativeFormContentTypes.includes(requestContentType)) { const origin = event.request.headers.get("origin"); if (!origin) { return errorResponse(403, "Non-JSON form requests need to have an origin"); } const validOrigins = [ new URL(event.request.url).host, ...(config.PUBLIC_ORIGIN ? [new URL(config.PUBLIC_ORIGIN).host] : []), ]; if (!validOrigins.includes(new URL(origin).host)) { return errorResponse(403, "Invalid referer for POST request"); } } } if ( event.request.method === "POST" || event.url.pathname.startsWith(`${base}/login`) || event.url.pathname.startsWith(`${base}/login/callback`) ) { // if the request is a POST request or login-related we refresh the cookie refreshSessionCookie(event.cookies, auth.secretSessionId); await collections.sessions.updateOne( { sessionId: auth.sessionId }, { $set: { updatedAt: new Date(), expiresAt: addWeeks(new Date(), 2) } } ); } if ( loginEnabled && !event.locals.user && !event.url.pathname.startsWith(`${base}/login`) && !event.url.pathname.startsWith(`${base}/admin`) && !event.url.pathname.startsWith(`${base}/settings`) && !["GET", "OPTIONS", "HEAD"].includes(event.request.method) ) { return errorResponse(401, ERROR_MESSAGES.authOnly); } let replaced = false; const response = await resolve(event, { transformPageChunk: (chunk) => { // For some reason, Sveltekit doesn't let us load env variables from .env in the app.html template if (replaced || !chunk.html.includes("%gaId%")) { return chunk.html; } replaced = true; return chunk.html.replace("%gaId%", config.PUBLIC_GOOGLE_ANALYTICS_ID); }, filterSerializedResponseHeaders: (header) => { return header.includes("content-type"); }, }); // Update request context with status code updateRequestContext({ statusCode: response.status }); // Add CSP header to control iframe embedding // Always allow huggingface.co; when ALLOW_IFRAME=true, allow all domains if (config.ALLOW_IFRAME !== "true") { response.headers.append( "Content-Security-Policy", "frame-ancestors https://huggingface.co;" ); } if ( event.url.pathname.startsWith(`${base}/login/callback`) || event.url.pathname.startsWith(`${base}/login`) ) { response.headers.append("Cache-Control", "no-store"); } if (event.url.pathname.startsWith(`${base}/api/`)) { // get origin from the request const requestOrigin = event.request.headers.get("origin"); // get origin from the config if its defined let allowedOrigin = config.PUBLIC_ORIGIN ? new URL(config.PUBLIC_ORIGIN).origin : undefined; if ( dev || // if we're in dev mode !requestOrigin || // or the origin is null (SSR) isHostLocalhost(new URL(requestOrigin).hostname) // or the origin is localhost ) { allowedOrigin = "*"; // allow all origins } else if (allowedOrigin === requestOrigin) { allowedOrigin = requestOrigin; // echo back the caller } if (allowedOrigin) { response.headers.set("Access-Control-Allow-Origin", allowedOrigin); response.headers.set( "Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS" ); response.headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization"); } } logger.info("Request completed"); return response; }, { requestId, url: event.url.pathname, ip: getClientAddressSafe(event) } ); } ================================================ FILE: src/lib/server/hooks/init.ts ================================================ import { config, ready } from "$lib/server/config"; import { logger } from "$lib/server/logger"; import { initExitHandler } from "$lib/server/exitHandler"; import { checkAndRunMigrations } from "$lib/migrations/migrations"; import { refreshConversationStats } from "$lib/jobs/refresh-conversation-stats"; import { loadMcpServersOnStartup } from "$lib/server/mcp/registry"; import { AbortedGenerations } from "$lib/server/abortedGenerations"; import { adminTokenManager } from "$lib/server/adminToken"; import { MetricsServer } from "$lib/server/metrics"; export async function initServer(): Promise { // Wait for config to be fully loaded await ready; // Ensure legacy env expected by some libs: map OPENAI_API_KEY -> HF_TOKEN if absent const canonicalToken = config.OPENAI_API_KEY || config.HF_TOKEN; if (canonicalToken) { process.env.HF_TOKEN ??= canonicalToken; } // Warn if legacy-only var is used if (!config.OPENAI_API_KEY && config.HF_TOKEN) { logger.warn( "HF_TOKEN is deprecated in favor of OPENAI_API_KEY. Please migrate to OPENAI_API_KEY." ); } logger.info("Starting server..."); initExitHandler(); if (config.METRICS_ENABLED === "true") { MetricsServer.getInstance(); } checkAndRunMigrations(); refreshConversationStats(); // Load MCP servers at startup loadMcpServersOnStartup(); // Init AbortedGenerations refresh process AbortedGenerations.getInstance(); adminTokenManager.displayToken(); if (config.EXPOSE_API) { logger.warn( "The EXPOSE_API flag has been deprecated. The API is now required for chat-ui to work." ); } } ================================================ FILE: src/lib/server/isURLLocal.spec.ts ================================================ import { isURLLocal } from "./isURLLocal"; import { describe, expect, it } from "vitest"; describe("isURLLocal", async () => { it("should return true for localhost", async () => { expect(await isURLLocal(new URL("http://localhost"))).toBe(true); }); it("should return true for 127.0.0.1", async () => { expect(await isURLLocal(new URL("http://127.0.0.1"))).toBe(true); }); it("should return true for 127.254.254.254", async () => { expect(await isURLLocal(new URL("http://127.254.254.254"))).toBe(true); }); it("should return false for huggingface.co", async () => { expect(await isURLLocal(new URL("https://huggingface.co/"))).toBe(false); }); it("should return true for 127.0.0.1.nip.io", async () => { expect(await isURLLocal(new URL("http://127.0.0.1.nip.io"))).toBe(true); }); it("should fail on ipv6", async () => { await expect(isURLLocal(new URL("http://[::1]"))).rejects.toThrow(); }); it("should fail on ipv6 --1.sslip.io", async () => { await expect(isURLLocal(new URL("http://--1.sslip.io"))).rejects.toThrow(); }); it("should fail on invalid domain names", async () => { await expect( isURLLocal(new URL("http://34329487239847329874923948732984.com/")) ).rejects.toThrow(); }); }); ================================================ FILE: src/lib/server/isURLLocal.ts ================================================ import { Address6, Address4 } from "ip-address"; import dns from "node:dns"; import { isIP } from "node:net"; const dnsLookup = (hostname: string): Promise<{ address: string; family: number }> => { return new Promise((resolve, reject) => { dns.lookup(hostname, (err, address, family) => { if (err) return reject(err); resolve({ address, family }); }); }); }; function assertValidHostname(hostname: string): void { if (!hostname || hostname.length > 253) { throw new Error("Invalid hostname"); } const labels = hostname.split("."); for (const label of labels) { if (!label || label.length > 63) { throw new Error("Invalid hostname"); } if (!/^[A-Za-z0-9-]+$/.test(label)) { throw new Error("Invalid hostname"); } if (label.startsWith("-") || label.endsWith("-")) { throw new Error("Invalid hostname"); } } } export async function isURLLocal(URL: URL): Promise { if (!isIP(URL.hostname)) { assertValidHostname(URL.hostname); } const { address, family } = await dnsLookup(URL.hostname); if (family === 4) { const addr = new Address4(address); const localSubnet = new Address4("127.0.0.0/8"); return addr.isInSubnet(localSubnet); } if (family === 6) { const addr = new Address6(address); return addr.isLoopback() || addr.isInSubnet(new Address6("::1/128")) || addr.isLinkLocal(); } throw Error("Unknown IP family"); } export function isURLStringLocal(url: string) { try { const urlObj = new URL(url); return isURLLocal(urlObj); } catch (e) { // assume local if URL parsing fails return true; } } export function isHostLocalhost(host: string): boolean { if (host === "localhost") return true; if (host === "::1" || host === "[::1]") return true; if (host.startsWith("127.") && isIP(host)) return true; if (host.endsWith(".localhost")) return true; return false; } ================================================ FILE: src/lib/server/logger.ts ================================================ import pino from "pino"; import { dev } from "$app/environment"; import { config } from "$lib/server/config"; import { getRequestContext } from "$lib/server/requestContext"; let options: pino.LoggerOptions = {}; if (dev) { options = { transport: { target: "pino-pretty", options: { colorize: true, }, }, }; } const baseLogger = pino({ ...options, messageKey: "message", level: config.LOG_LEVEL || "info", formatters: { level: (label) => { return { level: label }; }, }, mixin() { const ctx = getRequestContext(); if (!ctx) return {}; const result: Record = {}; if (ctx.requestId) result.request_id = ctx.requestId; if (ctx.url) result.url = ctx.url; if (ctx.ip) result.ip = ctx.ip; if (ctx.user) result.user = ctx.user; if (ctx.statusCode) result.status_code = ctx.statusCode; return result; }, }); export const logger = baseLogger; ================================================ FILE: src/lib/server/mcp/clientPool.ts ================================================ import { Client } from "@modelcontextprotocol/sdk/client"; import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js"; import type { McpServerConfig } from "./httpClient"; const pool = new Map(); function keyOf(server: McpServerConfig) { const headers = Object.entries(server.headers ?? {}) .sort(([a], [b]) => a.localeCompare(b)) .map(([k, v]) => `${k}:${v}`) .join("|\u0000|"); return `${server.url}|${headers}`; } export async function getClient(server: McpServerConfig, signal?: AbortSignal): Promise { const key = keyOf(server); const existing = pool.get(key); if (existing) return existing; let firstError: unknown; const client = new Client({ name: "chat-ui-mcp", version: "0.1.0" }); const url = new URL(server.url); const requestInit: RequestInit = { headers: server.headers, signal }; try { try { await client.connect(new StreamableHTTPClientTransport(url, { requestInit })); } catch (httpErr) { // Remember the original HTTP transport error so we can surface it if the fallback also fails. // Today we always show the SSE message, which is misleading when the real failure was HTTP (e.g. 500). firstError = httpErr; await client.connect(new SSEClientTransport(url, { requestInit })); } } catch (err) { try { await client.close?.(); } catch {} // Prefer the HTTP error if both transports fail; otherwise fall back to the last error. if (firstError) { const message = "HTTP transport failed: " + String(firstError instanceof Error ? firstError.message : firstError) + "; SSE fallback failed: " + String(err instanceof Error ? err.message : err); throw new Error(message, { cause: err instanceof Error ? err : undefined }); } throw err; } pool.set(key, client); return client; } export async function drainPool() { for (const [key, client] of pool) { try { await client.close?.(); } catch {} pool.delete(key); } } export function evictFromPool(server: McpServerConfig): Client | undefined { const key = keyOf(server); const client = pool.get(key); if (client) { pool.delete(key); } return client; } ================================================ FILE: src/lib/server/mcp/hf.ts ================================================ // Minimal shared helpers for HF MCP token forwarding export const hasAuthHeader = (h?: Record) => !!h && Object.keys(h).some((k) => k.toLowerCase() === "authorization"); export const isStrictHfMcpLogin = (urlString: string) => { try { const u = new URL(urlString); const host = u.hostname.toLowerCase(); const allowedHosts = new Set(["hf.co", "huggingface.co"]); return ( u.protocol === "https:" && allowedHosts.has(host) && u.pathname === "/mcp" && u.search === "?login" ); } catch { return false; } }; export const hasNonEmptyToken = (tok: unknown): tok is string => typeof tok === "string" && tok.trim().length > 0; export const isExaMcpServer = (urlString: string): boolean => { try { const u = new URL(urlString); return u.protocol === "https:" && u.hostname.toLowerCase() === "mcp.exa.ai"; } catch { return false; } }; ================================================ FILE: src/lib/server/mcp/httpClient.ts ================================================ import { Client } from "@modelcontextprotocol/sdk/client"; import { getClient, evictFromPool } from "./clientPool"; import { config } from "$lib/server/config"; function isConnectionClosedError(err: unknown): boolean { const message = err instanceof Error ? err.message : String(err); return message.includes("-32000") || message.toLowerCase().includes("connection closed"); } export interface McpServerConfig { name: string; url: string; headers?: Record; } const DEFAULT_TIMEOUT_MS = 120_000; export function getMcpToolTimeoutMs(): number { const envValue = config.MCP_TOOL_TIMEOUT_MS; if (envValue) { const parsed = parseInt(envValue, 10); if (!isNaN(parsed) && parsed > 0) { return parsed; } } return DEFAULT_TIMEOUT_MS; } export type McpToolTextResponse = { text: string; /** If the server returned structuredContent, include it raw */ structured?: unknown; /** Raw content blocks returned by the server, if any */ content?: unknown[]; }; export type McpToolProgress = { progress: number; total?: number; message?: string; }; export async function callMcpTool( server: McpServerConfig, tool: string, args: unknown = {}, { timeoutMs = DEFAULT_TIMEOUT_MS, signal, client, onProgress, }: { timeoutMs?: number; signal?: AbortSignal; client?: Client; onProgress?: (progress: McpToolProgress) => void; } = {} ): Promise { const normalizedArgs = typeof args === "object" && args !== null && !Array.isArray(args) ? (args as Record) : undefined; // Get a (possibly pooled) client. The client itself was connected with a signal // that already composes outer cancellation. We still enforce a per-call timeout here. let activeClient = client ?? (await getClient(server, signal)); const callToolOptions = { signal, timeout: timeoutMs, // Enable progress tokens so long-running tools keep extending the timeout. onprogress: (progress: McpToolProgress) => { onProgress?.({ progress: progress.progress, total: progress.total, message: progress.message, }); }, resetTimeoutOnProgress: true, }; let response; try { response = await activeClient.callTool( { name: tool, arguments: normalizedArgs }, undefined, callToolOptions ); } catch (err) { if (!isConnectionClosedError(err)) { throw err; } // Evict stale client and close it const stale = evictFromPool(server); stale?.close?.().catch(() => {}); // Retry with fresh client activeClient = await getClient(server, signal); response = await activeClient.callTool( { name: tool, arguments: normalizedArgs }, undefined, callToolOptions ); } const parts = Array.isArray(response?.content) ? (response.content as Array) : []; const textParts = parts .filter((part): part is { type: "text"; text: string } => { if (typeof part !== "object" || part === null) return false; const obj = part as Record; return obj["type"] === "text" && typeof obj["text"] === "string"; }) .map((p) => p.text); const text = textParts.join("\n"); const structured = (response as unknown as { structuredContent?: unknown })?.structuredContent; const contentBlocks = Array.isArray(response?.content) ? (response.content as unknown[]) : undefined; return { text, structured, content: contentBlocks }; } ================================================ FILE: src/lib/server/mcp/registry.ts ================================================ import { config } from "$lib/server/config"; import { logger } from "$lib/server/logger"; import type { McpServerConfig } from "./httpClient"; import { resetMcpToolsCache } from "./tools"; let cachedRaw: string | null = null; let cachedServers: McpServerConfig[] = []; function parseServers(raw: string): McpServerConfig[] { if (!raw) return []; try { const parsed = JSON.parse(raw); if (!Array.isArray(parsed)) return []; return parsed .map((entry) => { if (!entry || typeof entry !== "object") return undefined; const name = (entry as Record).name; const url = (entry as Record).url; if (typeof name !== "string" || !name.trim()) return undefined; if (typeof url !== "string" || !url.trim()) return undefined; const headersRaw = (entry as Record).headers; let headers: Record | undefined; if (headersRaw && typeof headersRaw === "object" && !Array.isArray(headersRaw)) { const headerEntries = Object.entries(headersRaw as Record).filter( (entry): entry is [string, string] => typeof entry[1] === "string" ); headers = Object.fromEntries(headerEntries); } return headers ? { name, url, headers } : { name, url }; }) .filter((server): server is McpServerConfig => Boolean(server)); } catch (error) { logger.warn({ err: error }, "[mcp] failed to parse MCP_SERVERS env"); return []; } } function setServers(raw: string) { cachedServers = parseServers(raw); cachedRaw = raw; resetMcpToolsCache(); logger.debug({ count: cachedServers.length }, "[mcp] loaded server configuration"); console.log( `[MCP] Loaded ${cachedServers.length} server(s):`, cachedServers.map((s) => s.name).join(", ") || "none" ); } export function loadMcpServersOnStartup(): McpServerConfig[] { const raw = config.MCP_SERVERS || "[]"; setServers(raw); return cachedServers; } export function refreshMcpServersIfChanged(): void { const currentRaw = config.MCP_SERVERS || "[]"; if (cachedRaw === null) { setServers(currentRaw); return; } if (currentRaw !== cachedRaw) { setServers(currentRaw); } } export function getMcpServers(): McpServerConfig[] { if (cachedRaw === null) { loadMcpServersOnStartup(); } return cachedServers; } ================================================ FILE: src/lib/server/mcp/tools.ts ================================================ import { Client } from "@modelcontextprotocol/sdk/client"; import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js"; import type { McpServerConfig } from "./httpClient"; import { logger } from "$lib/server/logger"; // use console.* for lightweight diagnostics in production logs export type OpenAiTool = { type: "function"; function: { name: string; description?: string; parameters?: Record }; }; export interface McpToolMapping { fnName: string; server: string; tool: string; } interface CacheEntry { fetchedAt: number; ttlMs: number; tools: OpenAiTool[]; mapping: Record; } const DEFAULT_TTL_MS = 60_000; const cache = new Map(); // Per OpenAI tool/function name guidelines most providers enforce: // ^[a-zA-Z0-9_-]{1,64}$ // Dots are not universally accepted (e.g., MiniMax via HF router rejects them). // Normalize any disallowed characters (including ".") to underscore and trim to 64 chars. function sanitizeName(name: string) { return name.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); } function buildCacheKey(servers: McpServerConfig[]): string { const normalized = servers .map((server) => ({ name: server.name, url: server.url, headers: server.headers ? Object.entries(server.headers) .sort(([a], [b]) => a.localeCompare(b)) .map(([key, value]) => [key, value]) : [], })) .sort((a, b) => { const byName = a.name.localeCompare(b.name); if (byName !== 0) return byName; return a.url.localeCompare(b.url); }); return JSON.stringify(normalized); } type ListedTool = { name?: string; inputSchema?: Record; description?: string; annotations?: { title?: string }; }; async function listServerTools( server: McpServerConfig, opts: { signal?: AbortSignal } = {} ): Promise { const url = new URL(server.url); const client = new Client({ name: "chat-ui-mcp", version: "0.1.0" }); try { try { const transport = new StreamableHTTPClientTransport(url, { requestInit: { headers: server.headers, signal: opts.signal }, }); await client.connect(transport); } catch { const transport = new SSEClientTransport(url, { requestInit: { headers: server.headers, signal: opts.signal }, }); await client.connect(transport); } const response = await client.listTools({}); const tools = Array.isArray(response?.tools) ? (response.tools as ListedTool[]) : []; try { logger.debug( { server: server.name, url: server.url, count: tools.length, toolNames: tools.map((t) => t?.name).filter(Boolean), }, "[mcp] listed tools from server" ); } catch {} return tools; } finally { try { await client.close?.(); } catch { // ignore close errors } } } export async function getOpenAiToolsForMcp( servers: McpServerConfig[], { ttlMs = DEFAULT_TTL_MS, signal }: { ttlMs?: number; signal?: AbortSignal } = {} ): Promise<{ tools: OpenAiTool[]; mapping: Record }> { const now = Date.now(); const cacheKey = buildCacheKey(servers); const cached = cache.get(cacheKey); if (cached && now - cached.fetchedAt < cached.ttlMs) { return { tools: cached.tools, mapping: cached.mapping }; } const tools: OpenAiTool[] = []; const mapping: Record = {}; const seenNames = new Set(); const pushToolDefinition = ( name: string, description: string | undefined, parameters: Record | undefined ) => { if (seenNames.has(name)) return; tools.push({ type: "function", function: { name, description, parameters, }, }); seenNames.add(name); }; // Fetch tools in parallel; tolerate individual failures const tasks = servers.map((server) => listServerTools(server, { signal })); const results = await Promise.allSettled(tasks); for (let i = 0; i < results.length; i++) { const server = servers[i]; const r = results[i]; if (r.status === "fulfilled") { const serverTools = r.value; for (const tool of serverTools) { if (typeof tool.name !== "string" || tool.name.trim().length === 0) { continue; } const parameters = tool.inputSchema && typeof tool.inputSchema === "object" ? tool.inputSchema : undefined; const description = tool.description ?? tool.annotations?.title; const toolName = tool.name; // Emit a collision-aware function name. // Prefer the plain tool name; on conflict, suffix with server name. let plainName = sanitizeName(toolName); if (plainName in mapping) { const suffix = sanitizeName(server.name); const candidate = `${plainName}_${suffix}`.slice(0, 64); if (!(candidate in mapping)) { plainName = candidate; } else { let i = 2; let next = `${candidate}_${i}`; while (i < 10 && next in mapping) { i += 1; next = `${candidate}_${i}`; } plainName = next.slice(0, 64); } } pushToolDefinition(plainName, description, parameters); mapping[plainName] = { fnName: plainName, server: server.name, tool: toolName, }; } } else { // ignore failure for this server continue; } } cache.set(cacheKey, { fetchedAt: now, ttlMs, tools, mapping }); return { tools, mapping }; } export function resetMcpToolsCache() { cache.clear(); } ================================================ FILE: src/lib/server/metrics.ts ================================================ import { collectDefaultMetrics, Counter, Registry, Summary } from "prom-client"; import { logger } from "$lib/server/logger"; import { config } from "$lib/server/config"; import { createServer, type Server as HttpServer } from "http"; import { onExit } from "./exitHandler"; type ModelLabel = "model"; type ToolLabel = "tool"; interface Metrics { model: { conversationsTotal: Counter; messagesTotal: Counter; tokenCountTotal: Counter; timePerOutputToken: Summary; timeToFirstToken: Summary; latency: Summary; votesPositive: Counter; votesNegative: Counter; }; webSearch: { requestCount: Counter; pageFetchCount: Counter; pageFetchCountError: Counter; pageFetchDuration: Summary; embeddingDuration: Summary; }; tool: { toolUseCount: Counter; toolUseCountError: Counter; toolUseDuration: Summary; timeToChooseTools: Summary; }; } export class MetricsServer { private static instance: MetricsServer | undefined; private readonly enabled: boolean; private readonly register: Registry; private readonly metrics: Metrics; private httpServer: HttpServer | undefined; private constructor() { this.enabled = config.METRICS_ENABLED === "true"; this.register = new Registry(); if (this.enabled) { collectDefaultMetrics({ register: this.register }); } this.metrics = this.createMetrics(); if (this.enabled) { this.startStandaloneServer(); } } public static getInstance(): MetricsServer { if (!MetricsServer.instance) { MetricsServer.instance = new MetricsServer(); } return MetricsServer.instance; } public static getMetrics(): Metrics { return MetricsServer.getInstance().metrics; } public static isEnabled(): boolean { return config.METRICS_ENABLED === "true"; } public async render(): Promise { if (!this.enabled) { return ""; } return this.register.metrics(); } private createMetrics(): Metrics { const labelNames: ModelLabel[] = ["model"]; const toolLabelNames: ToolLabel[] = ["tool"]; const noopRegistry = new Registry(); const registry = this.enabled ? this.register : noopRegistry; return { model: { conversationsTotal: new Counter({ name: "model_conversations_total", help: "Total number of conversations", labelNames, registers: [registry], }), messagesTotal: new Counter({ name: "model_messages_total", help: "Total number of messages", labelNames, registers: [registry], }), tokenCountTotal: new Counter({ name: "model_token_count_total", help: "Total number of tokens emitted by the model", labelNames, registers: [registry], }), timePerOutputToken: new Summary({ name: "model_time_per_output_token_ms", help: "Per-token latency in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), timeToFirstToken: new Summary({ name: "model_time_to_first_token_ms", help: "Time to first token in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), latency: new Summary({ name: "model_latency_ms", help: "Total time to complete a response in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), votesPositive: new Counter({ name: "model_votes_positive_total", help: "Total number of positive votes on model messages", labelNames, registers: [registry], }), votesNegative: new Counter({ name: "model_votes_negative_total", help: "Total number of negative votes on model messages", labelNames, registers: [registry], }), }, webSearch: { requestCount: new Counter({ name: "web_search_request_count", help: "Total number of web search requests", registers: [registry], }), pageFetchCount: new Counter({ name: "web_search_page_fetch_count", help: "Total number of web search page fetches", registers: [registry], }), pageFetchCountError: new Counter({ name: "web_search_page_fetch_count_error", help: "Total number of web search page fetch errors", registers: [registry], }), pageFetchDuration: new Summary({ name: "web_search_page_fetch_duration_ms", help: "Duration of web search page fetches in milliseconds", registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), embeddingDuration: new Summary({ name: "web_search_embedding_duration_ms", help: "Duration of web search embeddings in milliseconds", registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), }, tool: { toolUseCount: new Counter({ name: "tool_use_count", help: "Total number of tool invocations", labelNames: toolLabelNames, registers: [registry], }), toolUseCountError: new Counter({ name: "tool_use_count_error", help: "Total number of tool invocation errors", labelNames: toolLabelNames, registers: [registry], }), toolUseDuration: new Summary({ name: "tool_use_duration_ms", help: "Duration of tool invocations in milliseconds", labelNames: toolLabelNames, registers: [registry], maxAgeSeconds: 30 * 60, ageBuckets: 5, }), timeToChooseTools: new Summary({ name: "time_to_choose_tools_ms", help: "Time spent selecting tools in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), }, }; } private startStandaloneServer() { const port = Number(config.METRICS_PORT || "5565"); if (!Number.isInteger(port) || port < 0 || port > 65535) { logger.warn(`Invalid METRICS_PORT value: ${config.METRICS_PORT}`); return; } this.httpServer = createServer(async (req, res) => { if (req.method !== "GET") { res.statusCode = 405; res.end("Method Not Allowed"); return; } try { const payload = await this.render(); res.setHeader("Content-Type", "text/plain; version=0.0.4"); res.end(payload); } catch (error) { logger.error(error, "Failed to render metrics"); res.statusCode = 500; res.end("Failed to render metrics"); } }); this.httpServer.listen(port, () => { logger.info(`Metrics server listening on port ${port}`); }); onExit(async () => { if (!this.httpServer) return; logger.info("Shutting down metrics server..."); await new Promise((resolve, reject) => { this.httpServer?.close((err) => { if (err) { reject(err); return; } resolve(); }); }).catch((error) => logger.error(error, "Failed to close metrics server")); this.httpServer = undefined; }); } } ================================================ FILE: src/lib/server/models.ts ================================================ import { config } from "$lib/server/config"; import type { ChatTemplateInput } from "$lib/types/Template"; import { z } from "zod"; import endpoints, { endpointSchema, type Endpoint } from "./endpoints/endpoints"; import JSON5 from "json5"; import { logger } from "$lib/server/logger"; import { makeRouterEndpoint } from "$lib/server/router/endpoint"; type Optional = Pick, K> & Omit; const sanitizeJSONEnv = (val: string, fallback: string) => { const raw = (val ?? "").trim(); const unquoted = raw.startsWith("`") && raw.endsWith("`") ? raw.slice(1, -1) : raw; return unquoted || fallback; }; const modelConfig = z.object({ /** Used as an identifier in DB */ id: z.string().optional(), /** Used to link to the model page, and for inference */ name: z.string().default(""), displayName: z.string().min(1).optional(), description: z.string().min(1).optional(), logoUrl: z.string().url().optional(), websiteUrl: z.string().url().optional(), modelUrl: z.string().url().optional(), tokenizer: z.never().optional(), datasetName: z.string().min(1).optional(), datasetUrl: z.string().url().optional(), preprompt: z.string().default(""), prepromptUrl: z.string().url().optional(), chatPromptTemplate: z.never().optional(), promptExamples: z .array( z.object({ title: z.string().min(1), prompt: z.string().min(1), }) ) .optional(), endpoints: z.array(endpointSchema).optional(), providers: z.array(z.object({ supports_tools: z.boolean().optional() }).passthrough()).optional(), parameters: z .object({ temperature: z.number().min(0).max(2).optional(), truncate: z.number().int().positive().optional(), max_tokens: z.number().int().positive().optional(), stop: z.array(z.string()).optional(), top_p: z.number().positive().optional(), top_k: z.number().positive().optional(), frequency_penalty: z.number().min(-2).max(2).optional(), presence_penalty: z.number().min(-2).max(2).optional(), }) .passthrough() .optional(), multimodal: z.boolean().default(false), multimodalAcceptedMimetypes: z.array(z.string()).optional(), // Aggregated tool-calling capability across providers (HF router) supportsTools: z.boolean().default(false), unlisted: z.boolean().default(false), embeddingModel: z.never().optional(), /** Used to enable/disable system prompt usage */ systemRoleSupported: z.boolean().default(true), }); type ModelConfig = z.infer; const overrideEntrySchema = modelConfig .partial() .extend({ id: z.string().optional(), name: z.string().optional(), }) .refine((value) => Boolean((value.id ?? value.name)?.trim()), { message: "Model override entry must provide an id or name", }); type ModelOverride = z.infer; const openaiBaseUrl = config.OPENAI_BASE_URL ? config.OPENAI_BASE_URL.replace(/\/$/, "") : undefined; const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1"; const listSchema = z .object({ data: z.array( z.object({ id: z.string(), description: z.string().optional(), providers: z .array(z.object({ supports_tools: z.boolean().optional() }).passthrough()) .optional(), architecture: z .object({ input_modalities: z.array(z.string()).optional(), }) .passthrough() .optional(), }) ), }) .passthrough(); function getChatPromptRender(_m: ModelConfig): (inputs: ChatTemplateInput) => string { // Minimal template to support legacy "completions" flow if ever used. // We avoid any tokenizer/Jinja usage in this build. return ({ messages, preprompt }) => { const parts: string[] = []; if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`); for (const msg of messages) { const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase(); parts.push(`[${role}]\n${msg.content}`); } parts.push(`[ASSISTANT]`); return parts.join("\n\n"); }; } const processModel = async (m: ModelConfig) => ({ ...m, chatPromptRender: await getChatPromptRender(m), id: m.id || m.name, displayName: m.displayName || m.name, preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt, parameters: { ...m.parameters, stop_sequences: m.parameters?.stop }, unlisted: m.unlisted ?? false, }); const addEndpoint = (m: Awaited>) => ({ ...m, getEndpoint: async (): Promise => { if (!m.endpoints || m.endpoints.length === 0) { throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints."); } // Only support OpenAI-compatible endpoints in this build const endpoint = m.endpoints[0]; if (endpoint.type !== "openai") { throw new Error("Only 'openai' endpoint type is supported in this build"); } return await endpoints.openai({ ...endpoint, model: m }); }, }); type InternalProcessedModel = Awaited> & { isRouter: boolean; hasInferenceAPI: boolean; }; const inferenceApiIds: string[] = []; const getModelOverrides = (): ModelOverride[] => { const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? ""; if (!overridesEnv.trim()) { return []; } try { return z.array(overrideEntrySchema).parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]"))); } catch (error) { logger.error(error, "[models] Failed to parse MODELS overrides"); return []; } }; export type ModelsRefreshSummary = { refreshedAt: Date; durationMs: number; added: string[]; removed: string[]; changed: string[]; total: number; }; export type ProcessedModel = InternalProcessedModel; export let models: ProcessedModel[] = []; export let defaultModel!: ProcessedModel; export let taskModel!: ProcessedModel; export let validModelIdSchema: z.ZodType = z.string(); export let lastModelRefresh = new Date(0); export let lastModelRefreshDurationMs = 0; export let lastModelRefreshSummary: ModelsRefreshSummary = { refreshedAt: new Date(0), durationMs: 0, added: [], removed: [], changed: [], total: 0, }; let inflightRefresh: Promise | null = null; const createValidModelIdSchema = (modelList: ProcessedModel[]): z.ZodType => { if (modelList.length === 0) { throw new Error("No models available to build validation schema"); } const ids = new Set(modelList.map((m) => m.id)); return z.string().refine((value) => ids.has(value), "Invalid model id"); }; const resolveTaskModel = (modelList: ProcessedModel[]) => { if (modelList.length === 0) { throw new Error("No models available to select task model"); } if (config.TASK_MODEL) { const preferred = modelList.find( (m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL ); if (preferred) { return preferred; } } return modelList[0]; }; const signatureForModel = (model: ProcessedModel) => JSON.stringify({ description: model.description, displayName: model.displayName, providers: model.providers, parameters: model.parameters, preprompt: model.preprompt, prepromptUrl: model.prepromptUrl, endpoints: model.endpoints?.map((endpoint) => { if (endpoint.type === "openai") { const { type, baseURL } = endpoint; return { type, baseURL }; } return { type: endpoint.type }; }) ?? null, multimodal: model.multimodal, multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes, supportsTools: (model as unknown as { supportsTools?: boolean }).supportsTools ?? false, isRouter: model.isRouter, hasInferenceAPI: model.hasInferenceAPI, }); const applyModelState = (newModels: ProcessedModel[], startedAt: number): ModelsRefreshSummary => { if (newModels.length === 0) { throw new Error("Failed to load any models from upstream"); } const previousIds = new Set(models.map((m) => m.id)); const previousSignatures = new Map(models.map((m) => [m.id, signatureForModel(m)])); const refreshedAt = new Date(); const durationMs = Date.now() - startedAt; models = newModels; defaultModel = models[0]; taskModel = resolveTaskModel(models); validModelIdSchema = createValidModelIdSchema(models); lastModelRefresh = refreshedAt; lastModelRefreshDurationMs = durationMs; const added = newModels.map((m) => m.id).filter((id) => !previousIds.has(id)); const removed = Array.from(previousIds).filter( (id) => !newModels.some((model) => model.id === id) ); const changed = newModels .filter((model) => { const previousSignature = previousSignatures.get(model.id); return previousSignature !== undefined && previousSignature !== signatureForModel(model); }) .map((model) => model.id); const summary: ModelsRefreshSummary = { refreshedAt, durationMs, added, removed, changed, total: models.length, }; lastModelRefreshSummary = summary; logger.info( { total: summary.total, added: summary.added, removed: summary.removed, changed: summary.changed, durationMs: summary.durationMs, }, "[models] Model cache refreshed" ); return summary; }; const buildModels = async (): Promise => { if (!openaiBaseUrl) { logger.error( "OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)." ); throw new Error("OPENAI_BASE_URL not set"); } try { const baseURL = openaiBaseUrl; logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL"); // Canonical auth token is OPENAI_API_KEY; keep HF_TOKEN as legacy alias const authToken = config.OPENAI_API_KEY || config.HF_TOKEN; // Use auth token from the start if available to avoid rate limiting issues // Some APIs rate-limit unauthenticated requests more aggressively const response = await fetch(`${baseURL}/models`, { headers: authToken ? { Authorization: `Bearer ${authToken}` } : undefined, }); logger.info({ status: response.status }, "[models] First fetch status"); if (!response.ok && response.status === 401 && !authToken) { // If we get 401 and didn't have a token, there's nothing we can do throw new Error( `Failed to fetch ${baseURL}/models: ${response.status} ${response.statusText} (no auth token available)` ); } if (!response.ok) { throw new Error( `Failed to fetch ${baseURL}/models: ${response.status} ${response.statusText}` ); } const json = await response.json(); logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys"); const parsed = listSchema.parse(json); logger.info({ count: parsed.data.length }, "[models] Parsed models count"); let modelsRaw = parsed.data.map((m) => { let logoUrl: string | undefined = undefined; if (isHFRouter && m.id.includes("/")) { const org = m.id.split("/")[0]; logoUrl = `https://huggingface.co/api/avatars/${encodeURIComponent(org)}`; } const inputModalities = (m.architecture?.input_modalities ?? []).map((modality) => modality.toLowerCase() ); const supportsImageInput = inputModalities.includes("image") || inputModalities.includes("vision"); // If any provider supports tools, consider the model as supporting tools const supportsTools = Boolean((m.providers ?? []).some((p) => p?.supports_tools === true)); return { id: m.id, name: m.id, displayName: m.id, description: m.description, logoUrl, providers: m.providers, multimodal: supportsImageInput, multimodalAcceptedMimetypes: supportsImageInput ? ["image/*"] : undefined, supportsTools, endpoints: [ { type: "openai" as const, baseURL, // apiKey will be taken from OPENAI_API_KEY or HF_TOKEN automatically }, ], } as ModelConfig; }) as ModelConfig[]; const overrides = getModelOverrides(); if (overrides.length) { const overrideMap = new Map(); for (const override of overrides) { for (const key of [override.id, override.name]) { const trimmed = key?.trim(); if (trimmed) overrideMap.set(trimmed, override); } } modelsRaw = modelsRaw.map((model) => { const override = overrideMap.get(model.id ?? "") ?? overrideMap.get(model.name ?? ""); if (!override) return model; const { id, name, ...rest } = override; void id; void name; return { ...model, ...rest, }; }); } const builtModels = await Promise.all( modelsRaw.map((e) => processModel(e) .then(addEndpoint) .then(async (m) => ({ ...m, hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name), // router decoration added later isRouter: false as boolean, })) ) ); const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim(); const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni"; const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim(); const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni"; const routerMultimodalEnabled = (config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true"; const routerToolsEnabled = (config.LLM_ROUTER_ENABLE_TOOLS || "").toLowerCase() === "true"; let decorated = builtModels as ProcessedModel[]; if (archBase) { // Build a minimal model config for the alias const aliasRaw = { id: routerAliasId, name: routerAliasId, displayName: routerLabel, description: "Automatically routes your messages to the best model for your request.", logoUrl: routerLogo || undefined, preprompt: "", endpoints: [ { type: "openai" as const, baseURL: openaiBaseUrl, }, ], // Keep the alias visible unlisted: false, } as ModelConfig; if (routerMultimodalEnabled) { aliasRaw.multimodal = true; aliasRaw.multimodalAcceptedMimetypes = ["image/*"]; } if (routerToolsEnabled) { aliasRaw.supportsTools = true; } const aliasBase = await processModel(aliasRaw); // Create a self-referential ProcessedModel for the router endpoint const aliasModel: ProcessedModel = { ...aliasBase, isRouter: true, hasInferenceAPI: false, // getEndpoint uses the router wrapper regardless of the endpoints array getEndpoint: async (): Promise => makeRouterEndpoint(aliasModel), } as ProcessedModel; // Put alias first decorated = [aliasModel, ...decorated]; } return decorated; } catch (e) { logger.error(e, "Failed to load models from OpenAI base URL"); throw e; } }; const rebuildModels = async (): Promise => { const startedAt = Date.now(); const newModels = await buildModels(); return applyModelState(newModels, startedAt); }; await rebuildModels(); export const refreshModels = async (): Promise => { if (inflightRefresh) { return inflightRefresh; } inflightRefresh = rebuildModels().finally(() => { inflightRefresh = null; }); return inflightRefresh; }; export const validateModel = (_models: BackendModel[]) => { // Zod enum function requires 2 parameters return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]); }; // if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself export type BackendModel = Optional< typeof defaultModel, "preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI" >; ================================================ FILE: src/lib/server/requestContext.ts ================================================ import { AsyncLocalStorage } from "node:async_hooks"; import { randomUUID } from "node:crypto"; export interface RequestContext { requestId: string; url?: string; ip?: string; user?: string; statusCode?: number; } const asyncLocalStorage = new AsyncLocalStorage(); /** * Run a function within a request context. * All logs within this context will automatically include the requestId. */ export function runWithRequestContext( fn: () => T, context: Partial & { requestId?: string } = {} ): T { const fullContext: RequestContext = { requestId: context.requestId ?? randomUUID(), url: context.url, ip: context.ip, user: context.user, statusCode: context.statusCode, }; return asyncLocalStorage.run(fullContext, fn); } /** * Update the current request context with additional information. * Useful for adding user information after authentication. */ export function updateRequestContext(updates: Partial>): void { const store = asyncLocalStorage.getStore(); if (store) { Object.assign(store, updates); } } /** * Get the current request context, if any. */ export function getRequestContext(): RequestContext | undefined { return asyncLocalStorage.getStore(); } /** * Get the current request ID, or undefined if not in a request context. */ export function getRequestId(): string | undefined { return asyncLocalStorage.getStore()?.requestId; } ================================================ FILE: src/lib/server/router/arch.ts ================================================ import { config } from "$lib/server/config"; import { logger } from "$lib/server/logger"; import type { EndpointMessage } from "../endpoints/endpoints"; import type { Route, RouteConfig, RouteSelection } from "./types"; import { getRoutes } from "./policy"; import { getApiToken } from "$lib/server/apiToken"; const DEFAULT_LAST_TURNS = 16; /** * Trim a message by keeping start and end, replacing middle with minimal indicator. * Uses simple ellipsis since router only needs context for intent classification, not exact content. * @param content - The message content to trim * @param maxLength - Maximum total length (including indicator) * @returns Trimmed content with start, ellipsis, and end */ function trimMiddle(content: string, maxLength: number): string { if (content.length <= maxLength) return content; const indicator = "…"; const availableLength = maxLength - indicator.length; if (availableLength <= 0) { // If no room even for indicator, just hard truncate return content.slice(0, maxLength); } // Reserve more space for the start (typically contains context) const startLength = Math.ceil(availableLength * 0.6); const endLength = availableLength - startLength; // Bug fix: slice(-0) returns entire string, so check for endLength <= 0 if (endLength <= 0) { // Not enough space for end portion, just use start + indicator return content.slice(0, availableLength) + indicator; } const start = content.slice(0, startLength); const end = content.slice(-endLength); return start + indicator + end; } const PROMPT_TEMPLATE = ` You are a helpful assistant designed to find the best suited route. You are provided with route description within XML tags: {routes} {conversation} Your task is to decide which route is best suit with user intent on the conversation in XML tags. Follow those instructions: 1. Use prior turns to choose the best route for the current message if needed. 2. If no route match the full conversation respond with other route {"route": "other"}. 3. Analyze the route descriptions and find the best match route for user latest intent. 4. Respond only with the route name that best matches the user's request, using the exact name in the block. Based on your analysis, provide your response in the following JSON format if you decide to match any route: {"route": "route_name"} `.trim(); function lastNTurns(arr: T[], n = DEFAULT_LAST_TURNS) { if (!Array.isArray(arr)) return [] as T[]; return arr.slice(-n); } function toRouterPrompt(messages: EndpointMessage[], routes: Route[]) { const simpleRoutes: RouteConfig[] = routes.map((r) => ({ name: r.name, description: r.description, })); const maxAssistantLength = parseInt(config.LLM_ROUTER_MAX_ASSISTANT_LENGTH || "1000", 10); const maxPrevUserLength = parseInt(config.LLM_ROUTER_MAX_PREV_USER_LENGTH || "1000", 10); const convo = messages .map((m) => ({ role: m.from, content: m.content })) .filter((m) => typeof m.content === "string" && m.content.trim() !== ""); // Find the last user message index to preserve its full content const lastUserIndex = convo.findLastIndex((m) => m.role === "user"); const trimmedConvo = convo.map((m, idx) => { if (typeof m.content !== "string") return m; // Trim assistant messages to reduce routing prompt size and improve latency // Keep start and end for better context understanding if (m.role === "assistant") { return { ...m, content: trimMiddle(m.content, maxAssistantLength), }; } // Trim previous user messages, but keep the latest user message full // Keep start and end to preserve both context and question if (m.role === "user" && idx !== lastUserIndex) { return { ...m, content: trimMiddle(m.content, maxPrevUserLength), }; } return m; }); return PROMPT_TEMPLATE.replace("{routes}", JSON.stringify(simpleRoutes)).replace( "{conversation}", JSON.stringify(lastNTurns(trimmedConvo)) ); } function parseRouteName(text: string): string | undefined { if (!text) return; try { const obj = JSON.parse(text); if (typeof obj?.route === "string" && obj.route.trim()) return obj.route.trim(); } catch {} const m = text.match(/["']route["']\s*:\s*["']([^"']+)["']/); if (m?.[1]) return m[1].trim(); try { const obj = JSON.parse(text.replace(/'/g, '"')); if (typeof obj?.route === "string" && obj.route.trim()) return obj.route.trim(); } catch {} return; } export async function archSelectRoute( messages: EndpointMessage[], traceId: string | undefined, locals: App.Locals | undefined ): Promise { const routes = await getRoutes(); const prompt = toRouterPrompt(messages, routes); const baseURL = (config.LLM_ROUTER_ARCH_BASE_URL || "").replace(/\/$/, ""); const archModel = config.LLM_ROUTER_ARCH_MODEL || "router/omni"; if (!baseURL) { logger.warn("LLM_ROUTER_ARCH_BASE_URL not set; routing will fail over to fallback."); return { routeName: "arch_router_failure" }; } const headers: HeadersInit = { Authorization: `Bearer ${getApiToken(locals)}`, "Content-Type": "application/json", // Bill to organization if configured (HuggingChat only) ...(config.isHuggingChat && locals?.billingOrganization ? { "X-HF-Bill-To": locals.billingOrganization } : {}), }; const body = { model: archModel, messages: [{ role: "user", content: prompt }], temperature: 0, max_tokens: 16, stream: false, }; const ctrl = new AbortController(); const timeoutMs = Number(config.LLM_ROUTER_ARCH_TIMEOUT_MS || 10000); const to = setTimeout(() => ctrl.abort(), timeoutMs); try { const resp = await fetch(`${baseURL}/chat/completions`, { method: "POST", headers, body: JSON.stringify(body), signal: ctrl.signal, }); clearTimeout(to); if (!resp.ok) { // Extract error message from response let errorMessage = `arch-router ${resp.status}`; try { const errorData = await resp.json(); // Try to extract message from OpenAI-style error format if (errorData.error?.message) { errorMessage = errorData.error.message; } else if (errorData.message) { errorMessage = errorData.message; } } catch { // If JSON parsing fails, use status text errorMessage = resp.statusText || errorMessage; } logger.warn( { status: resp.status, error: errorMessage, traceId }, "[arch] router returned error" ); return { routeName: "arch_router_failure", error: { message: errorMessage, statusCode: resp.status, }, }; } const data: { choices: { message: { content: string } }[] } = await resp.json(); const text = (data?.choices?.[0]?.message?.content ?? "").toString().trim(); const raw = parseRouteName(text); const other = config.LLM_ROUTER_OTHER_ROUTE || "casual_conversation"; const chosen = raw === "other" ? other : raw || "casual_conversation"; const exists = routes.some((r) => r.name === chosen); return { routeName: exists ? chosen : "casual_conversation" }; } catch (e) { clearTimeout(to); const err = e as Error; logger.warn({ err: String(e), traceId }, "arch router selection failed"); // Return error with context but no status code (network/timeout errors) return { routeName: "arch_router_failure", error: { message: err.message || String(e), }, }; } } ================================================ FILE: src/lib/server/router/endpoint.ts ================================================ import type { Endpoint, EndpointParameters, EndpointMessage, TextGenerationStreamOutputSimplified, } from "../endpoints/endpoints"; import endpoints from "../endpoints/endpoints"; import type { ProcessedModel } from "../models"; import { config } from "$lib/server/config"; import { logger } from "$lib/server/logger"; import { archSelectRoute } from "./arch"; import { getRoutes, resolveRouteModels } from "./policy"; import { getApiToken } from "$lib/server/apiToken"; import { ROUTER_FAILURE } from "./types"; import { hasActiveToolsSelection, isRouterToolsBypassEnabled, pickToolsCapableModel, ROUTER_TOOLS_ROUTE, } from "./toolsRoute"; import { getConfiguredMultimodalModelId } from "./multimodal"; const REASONING_BLOCK_REGEX = /[\s\S]*?(?:<\/think>|$)/g; const ROUTER_MULTIMODAL_ROUTE = "multimodal"; // Cache models at module level to avoid redundant dynamic imports on every request let cachedModels: ProcessedModel[] | undefined; async function getModels(): Promise { if (!cachedModels) { const mod = await import("../models"); cachedModels = (mod as { models: ProcessedModel[] }).models; } return cachedModels; } /** * Custom error class that preserves HTTP status codes */ class HTTPError extends Error { constructor( message: string, public statusCode?: number ) { super(message); this.name = "HTTPError"; } } /** * Extract the actual error message and status from OpenAI SDK errors or other upstream errors */ function extractUpstreamError(error: unknown): { message: string; statusCode?: number } { // Check if it's an OpenAI APIError with structured error info if (error && typeof error === "object") { const err = error as Record; // OpenAI SDK error with error.error.message and status if ( err.error && typeof err.error === "object" && "message" in err.error && typeof err.error.message === "string" ) { return { message: err.error.message, statusCode: typeof err.status === "number" ? err.status : undefined, }; } // HTTPError or error with statusCode if (typeof err.statusCode === "number" && typeof err.message === "string") { return { message: err.message, statusCode: err.statusCode }; } // Error with status field if (typeof err.status === "number" && typeof err.message === "string") { return { message: err.message, statusCode: err.status }; } // Direct error message if (typeof err.message === "string") { return { message: err.message }; } } return { message: String(error) }; } /** * Determines if an error is a policy/entitlement error that should be shown to users immediately * (vs transient errors that should trigger fallback) */ function isPolicyError(statusCode?: number): boolean { if (!statusCode) return false; // 400: Bad Request, 402: Payment Required, 401: Unauthorized, 403: Forbidden return statusCode === 400 || statusCode === 401 || statusCode === 402 || statusCode === 403; } function stripReasoningBlocks(text: string): string { const stripped = text.replace(REASONING_BLOCK_REGEX, ""); return stripped === text ? text : stripped.trim(); } function stripReasoningFromMessage(message: EndpointMessage): EndpointMessage { const content = typeof message.content === "string" ? stripReasoningBlocks(message.content) : message.content; return { ...message, content, }; } /** * Create an Endpoint that performs route selection via Arch and then forwards * to the selected model (with fallbacks) using the OpenAI-compatible endpoint. */ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise { return async function routerEndpoint(params: EndpointParameters) { const routes = await getRoutes(); const sanitizedMessages = params.messages.map(stripReasoningFromMessage); const routerMultimodalEnabled = (config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true"; const routerToolsEnabled = isRouterToolsBypassEnabled(); const hasImageInput = sanitizedMessages.some((message) => (message.files ?? []).some( (file) => typeof file?.mime === "string" && file.mime.startsWith("image/") ) ); // Tools are considered "active" if the client indicated any enabled MCP server const hasToolsActive = hasActiveToolsSelection(params.locals); // Helper to create an OpenAI endpoint for a specific candidate model id async function createCandidateEndpoint(candidateModelId: string): Promise { // Try to use the real candidate model config if present in chat-ui's model list let modelForCall: ProcessedModel | undefined; try { const all = await getModels(); modelForCall = all?.find((m) => m.id === candidateModelId || m.name === candidateModelId); } catch (e) { logger.warn({ err: String(e) }, "[router] failed to load models for candidate lookup"); } if (!modelForCall) { // Fallback: clone router model with candidate id modelForCall = { ...routerModel, id: candidateModelId, name: candidateModelId, displayName: candidateModelId, } as ProcessedModel; } return endpoints.openai({ type: "openai", baseURL: (config.OPENAI_BASE_URL || "https://router.huggingface.co/v1").replace(/\/$/, ""), apiKey: getApiToken(params.locals), model: modelForCall, // Ensure streaming path is used streamingSupported: true, }); } // Yield router metadata for immediate UI display, using the actual candidate async function* metadataThenStream( gen: AsyncGenerator, actualModel: string, selectedRoute: string ) { yield { token: { id: 0, text: "", special: true, logprob: 0 }, generated_text: null, details: null, routerMetadata: { route: selectedRoute, model: actualModel }, }; for await (const ev of gen) yield ev; } if (routerMultimodalEnabled && hasImageInput) { let multimodalCandidate: string | undefined; try { const all = await getModels(); multimodalCandidate = getConfiguredMultimodalModelId(all); } catch (e) { logger.warn({ err: String(e) }, "[router] failed to load models for multimodal lookup"); } if (!multimodalCandidate) { throw new Error( "Router multimodal is enabled but LLM_ROUTER_MULTIMODAL_MODEL is not correctly configured. Remove the image or configure a multimodal model via LLM_ROUTER_MULTIMODAL_MODEL." ); } try { logger.info( { route: ROUTER_MULTIMODAL_ROUTE, model: multimodalCandidate }, "[router] multimodal input detected; bypassing Arch selection" ); const ep = await createCandidateEndpoint(multimodalCandidate); const gen = await ep({ ...params }); return metadataThenStream(gen, multimodalCandidate, ROUTER_MULTIMODAL_ROUTE); } catch (e) { const { message, statusCode } = extractUpstreamError(e); logger.error( { route: ROUTER_MULTIMODAL_ROUTE, model: multimodalCandidate, err: message, ...(statusCode && { status: statusCode }), }, "[router] multimodal fallback failed" ); throw statusCode ? new HTTPError(message, statusCode) : new Error(message); } } async function findToolsCandidateModel(): Promise { try { const all = await getModels(); return pickToolsCapableModel(all); } catch (e) { logger.warn({ err: String(e) }, "[router] failed to load models for tools lookup"); return undefined; } } if (routerToolsEnabled && hasToolsActive) { const toolsModel = await findToolsCandidateModel(); const toolsCandidate = toolsModel?.id ?? toolsModel?.name; if (!toolsCandidate) { // No tool-capable model found — continue with normal routing instead of hard failing } else { try { logger.info( { route: ROUTER_TOOLS_ROUTE, model: toolsCandidate }, "[router] tools active; bypassing Arch selection" ); const ep = await createCandidateEndpoint(toolsCandidate); const gen = await ep({ ...params }); return metadataThenStream(gen, toolsCandidate, ROUTER_TOOLS_ROUTE); } catch (e) { const { message, statusCode } = extractUpstreamError(e); const logData = { route: ROUTER_TOOLS_ROUTE, model: toolsCandidate, err: message, ...(statusCode && { status: statusCode }), }; if (statusCode === 402) { logger.warn(logData, "[router] tools fallback failed due to payment required"); } else { logger.error(logData, "[router] tools fallback failed"); } throw statusCode ? new HTTPError(message, statusCode) : new Error(message); } } } const routeSelection = await archSelectRoute(sanitizedMessages, undefined, params.locals); // If arch router failed with an error, only hard-fail for policy errors (402/401/403) // For transient errors (5xx, timeouts, network), allow fallback to continue if (routeSelection.routeName === ROUTER_FAILURE && routeSelection.error) { const { message, statusCode } = routeSelection.error; if (isPolicyError(statusCode)) { // Policy errors should be surfaced to the user immediately (e.g., subscription required) logger.error( { err: message, ...(statusCode && { status: statusCode }) }, "[router] arch router failed with policy error, propagating to client" ); throw statusCode ? new HTTPError(message, statusCode) : new Error(message); } // Transient errors: log and continue to fallback logger.warn( { err: message, ...(statusCode && { status: statusCode }) }, "[router] arch router failed with transient error, attempting fallback" ); } const fallbackModel = config.LLM_ROUTER_FALLBACK_MODEL || routerModel.id; const { candidates } = resolveRouteModels(routeSelection.routeName, routes, fallbackModel); let lastErr: unknown = undefined; for (const candidate of candidates) { try { logger.info( { route: routeSelection.routeName, model: candidate }, "[router] trying candidate" ); const ep = await createCandidateEndpoint(candidate); const gen = await ep({ ...params }); return metadataThenStream(gen, candidate, routeSelection.routeName); } catch (e) { lastErr = e; const { message: errMsg, statusCode: errStatus } = extractUpstreamError(e); logger.warn( { route: routeSelection.routeName, model: candidate, err: errMsg, ...(errStatus && { status: errStatus }), }, "[router] candidate failed" ); continue; } } // Exhausted all candidates — throw to signal upstream failure // Forward the upstream error to the client const { message, statusCode } = extractUpstreamError(lastErr); throw statusCode ? new HTTPError(message, statusCode) : new Error(message); }; } ================================================ FILE: src/lib/server/router/multimodal.ts ================================================ import { config } from "$lib/server/config"; import type { ProcessedModel } from "../models"; /** * Returns the configured multimodal model when it exists and is valid. * - Requires LLM_ROUTER_MULTIMODAL_MODEL to be set (id or name). * - Ignores router aliases and non-multimodal models. */ export function findConfiguredMultimodalModel( models: ProcessedModel[] | undefined ): ProcessedModel | undefined { const preferredModelId = (config.LLM_ROUTER_MULTIMODAL_MODEL || "").trim(); if (!preferredModelId || !models?.length) return undefined; return models.find( (candidate) => (candidate.id === preferredModelId || candidate.name === preferredModelId) && !candidate.isRouter && candidate.multimodal ); } export function getConfiguredMultimodalModelId( models: ProcessedModel[] | undefined ): string | undefined { const model = findConfiguredMultimodalModel(models); return model?.id ?? model?.name; } ================================================ FILE: src/lib/server/router/policy.ts ================================================ import { readFile } from "node:fs/promises"; import { config } from "$lib/server/config"; import type { Route } from "./types"; let ROUTES: Route[] = []; let loaded = false; export async function loadPolicy(): Promise { const path = config.LLM_ROUTER_ROUTES_PATH; const text = await readFile(path, "utf8"); const arr = JSON.parse(text) as Route[]; if (!Array.isArray(arr)) { throw new Error("Routes config must be a flat array of routes"); } const seen = new Set(); for (const r of arr) { if (!r?.name || !r?.description || !r?.primary_model) { throw new Error(`Invalid route entry: ${JSON.stringify(r)}`); } if (seen.has(r.name)) { throw new Error(`Duplicate route name: ${r.name}`); } seen.add(r.name); } ROUTES = arr; loaded = true; return ROUTES; } export async function getRoutes(): Promise { if (!loaded) await loadPolicy(); return ROUTES; } export function resolveRouteModels( routeName: string, routes: Route[], fallbackModel: string ): { candidates: string[] } { if (routeName === "arch_router_failure") { return { candidates: [fallbackModel] }; } const sel = routes.find((r) => r.name === routeName) || routes.find((r) => r.name === "casual_conversation"); if (!sel) return { candidates: [fallbackModel] }; const fallbacks = Array.isArray(sel.fallback_models) ? sel.fallback_models : []; return { candidates: [sel.primary_model, ...fallbacks] }; } ================================================ FILE: src/lib/server/router/toolsRoute.ts ================================================ import { config } from "$lib/server/config"; import { logger } from "$lib/server/logger"; import type { ProcessedModel } from "../models"; export const ROUTER_TOOLS_ROUTE = "agentic"; type LocalsWithMcp = App.Locals & { mcp?: { selectedServers?: unknown[]; selectedServerNames?: unknown[]; }; }; export function isRouterToolsBypassEnabled(): boolean { return (config.LLM_ROUTER_ENABLE_TOOLS || "").toLowerCase() === "true"; } export function hasActiveToolsSelection(locals: App.Locals | undefined): boolean { try { const reqMcp = (locals as LocalsWithMcp | undefined)?.mcp; const byConfig = Array.isArray(reqMcp?.selectedServers) && (reqMcp?.selectedServers?.length ?? 0) > 0; const byName = Array.isArray(reqMcp?.selectedServerNames) && (reqMcp?.selectedServerNames?.length ?? 0) > 0; return Boolean(byConfig || byName); } catch { return false; } } export function pickToolsCapableModel( models: ProcessedModel[] | undefined ): ProcessedModel | undefined { const preferredRaw = (config as unknown as Record).LLM_ROUTER_TOOLS_MODEL; const preferred = preferredRaw?.trim(); if (!preferred) { logger.warn("[router] tools bypass requested but LLM_ROUTER_TOOLS_MODEL is not set"); return undefined; } if (!models?.length) return undefined; const found = models.find((m) => m.id === preferred || m.name === preferred); if (!found) { logger.warn( { configuredModel: preferred }, "[router] configured tools model not found; falling back to Arch routing" ); return undefined; } logger.info({ model: found.id ?? found.name }, "[router] using configured tools model"); return found; } ================================================ FILE: src/lib/server/router/types.ts ================================================ export interface Route { name: string; description: string; primary_model: string; fallback_models?: string[]; } export interface RouteConfig { name: string; description: string; } export interface RouteSelection { routeName: string; error?: { message: string; statusCode?: number; }; } export const ROUTER_FAILURE = "arch_router_failure"; ================================================ FILE: src/lib/server/sendSlack.ts ================================================ import { config } from "$lib/server/config"; import { logger } from "$lib/server/logger"; export async function sendSlack(text: string) { if (!config.WEBHOOK_URL_REPORT_ASSISTANT) { logger.warn("WEBHOOK_URL_REPORT_ASSISTANT is not set, tried to send a slack message."); return; } const res = await fetch(config.WEBHOOK_URL_REPORT_ASSISTANT, { method: "POST", headers: { "Content-type": "application/json", }, body: JSON.stringify({ text, }), }); if (!res.ok) { logger.error(`Webhook message failed. ${res.statusText} ${res.text}`); } } ================================================ FILE: src/lib/server/textGeneration/generate.ts ================================================ import { config } from "$lib/server/config"; import { MessageReasoningUpdateType, MessageUpdateType, type MessageUpdate, } from "$lib/types/MessageUpdate"; import { AbortedGenerations } from "../abortedGenerations"; import type { TextGenerationContext } from "./types"; import type { EndpointMessage } from "../endpoints/endpoints"; import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint"; import { generateSummaryOfReasoning } from "./reasoning"; import { logger } from "../logger"; type GenerateContext = Omit & { messages: EndpointMessage[] }; export async function* generate( { model, endpoint, conv, messages, assistant, promptedAt, forceMultimodal, provider, locals, abortController, }: GenerateContext, preprompt?: string ): AsyncIterable { // Reasoning mode support let reasoning = false; let reasoningBuffer = ""; let lastReasoningUpdate = new Date(); let status = ""; const startTime = new Date(); const modelReasoning = Reflect.get(model, "reasoning") as | { type: string; beginToken?: string; endToken?: string; regex?: string } | undefined; if ( modelReasoning && (modelReasoning.type === "regex" || modelReasoning.type === "summarize" || (modelReasoning.type === "tokens" && modelReasoning.beginToken === "")) ) { // Starts in reasoning mode and we extract the answer from the reasoning reasoning = true; yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Status, status: "Started reasoning...", }; } const stream = await endpoint({ messages, preprompt, generateSettings: assistant?.generateSettings, // Allow user-level override to force multimodal isMultimodal: (forceMultimodal ?? false) || model.multimodal, conversationId: conv._id, locals, abortSignal: abortController.signal, provider, }); for await (const output of stream) { // Check if this output contains router metadata. Emit if either: // 1) route+model are present (router models), or // 2) provider-only is present (non-router models exposing x-inference-provider) if ("routerMetadata" in output && output.routerMetadata) { const hasRouteModel = Boolean(output.routerMetadata.route && output.routerMetadata.model); const hasProviderOnly = Boolean(output.routerMetadata.provider); if (hasRouteModel || hasProviderOnly) { yield { type: MessageUpdateType.RouterMetadata, route: output.routerMetadata.route || "", model: output.routerMetadata.model || "", provider: (output.routerMetadata .provider as unknown as import("@huggingface/inference").InferenceProvider) || undefined, }; continue; } } // text generation completed if (output.generated_text) { // If an abort happened just before final output, stop here and let // the caller emit an interrupted final answer with partial text. const abortTime = AbortedGenerations.getInstance().getAbortTime(conv._id.toString()); if (abortController.signal.aborted || (abortTime && abortTime > promptedAt)) { if (!abortController.signal.aborted) { abortController.abort(); } break; } let interrupted = !output.token.special && !model.parameters.stop?.includes(output.token.text); let text = output.generated_text.trimEnd(); for (const stopToken of model.parameters.stop ?? []) { if (!text.endsWith(stopToken)) continue; interrupted = false; text = text.slice(0, text.length - stopToken.length); } let finalAnswer = text; if (modelReasoning && modelReasoning.type === "regex" && modelReasoning.regex) { const regex = new RegExp(modelReasoning.regex); finalAnswer = regex.exec(reasoningBuffer)?.[1] ?? text; } else if (modelReasoning && modelReasoning.type === "summarize") { yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Status, status: "Summarizing reasoning...", }; try { const summary = yield* generateFromDefaultEndpoint({ messages: [ { from: "user", content: `Question: ${messages[messages.length - 1].content}\n\nReasoning: ${reasoningBuffer}`, }, ], preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the reasoning steps explicitly include a code solution, make sure to include it in your answer.`, modelId: Reflect.get(model, "id") as string | undefined, locals, }); finalAnswer = summary; yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Status, status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`, }; } catch (e) { finalAnswer = text; logger.error(e, "Error generating summary of reasoning"); } } else if (modelReasoning && modelReasoning.type === "tokens") { // Remove the reasoning segment from final answer to avoid duplication const beginIndex = modelReasoning.beginToken ? reasoningBuffer.indexOf(modelReasoning.beginToken) : 0; const endIndex = modelReasoning.endToken ? reasoningBuffer.lastIndexOf(modelReasoning.endToken) : -1; if (beginIndex !== -1 && endIndex !== -1 && modelReasoning.endToken) { finalAnswer = text.slice(0, beginIndex) + text.slice(endIndex + modelReasoning.endToken.length); } } yield { type: MessageUpdateType.FinalAnswer, text: finalAnswer, interrupted }; continue; } if (modelReasoning && modelReasoning.type === "tokens") { if (output.token.text === modelReasoning.beginToken) { reasoning = true; reasoningBuffer += output.token.text; continue; } else if (modelReasoning.endToken && output.token.text === modelReasoning.endToken) { reasoning = false; reasoningBuffer += output.token.text; yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Status, status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`, }; continue; } } // ignore special tokens if (output.token.special) continue; // pass down normal token if (reasoning) { reasoningBuffer += output.token.text; if (modelReasoning && modelReasoning.type === "tokens" && modelReasoning.endToken) { if (reasoningBuffer.lastIndexOf(modelReasoning.endToken) !== -1) { const endTokenIndex = reasoningBuffer.lastIndexOf(modelReasoning.endToken); const textBuffer = reasoningBuffer.slice(endTokenIndex + modelReasoning.endToken.length); reasoningBuffer = reasoningBuffer.slice( 0, endTokenIndex + modelReasoning.endToken.length + 1 ); yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Stream, token: output.token.text, }; yield { type: MessageUpdateType.Stream, token: textBuffer }; yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Status, status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`, }; reasoning = false; continue; } } // yield status update if it has changed if (status !== "") { yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Status, status, }; status = ""; } // create a new status every ~4s (optional) if ( Reflect.get(config, "REASONING_SUMMARY") === "true" && new Date().getTime() - lastReasoningUpdate.getTime() > 4000 ) { lastReasoningUpdate = new Date(); try { generateSummaryOfReasoning(reasoningBuffer, model.id, locals).then((summary) => { status = summary; }); } catch (e) { logger.error(e, "Error generating summary of reasoning"); } } yield { type: MessageUpdateType.Reasoning, subtype: MessageReasoningUpdateType.Stream, token: output.token.text, }; } else { yield { type: MessageUpdateType.Stream, token: output.token.text }; } // abort check const date = AbortedGenerations.getInstance().getAbortTime(conv._id.toString()); if (date && date > promptedAt) { logger.info(`Aborting generation for conversation ${conv._id}`); if (!abortController.signal.aborted) { abortController.abort(); } break; } // no output check if (!output) break; } } ================================================ FILE: src/lib/server/textGeneration/index.ts ================================================ import { preprocessMessages } from "../endpoints/preprocessMessages"; import { generateTitleForConversation } from "./title"; import { type MessageUpdate, MessageUpdateType, MessageUpdateStatus, } from "$lib/types/MessageUpdate"; import { generate } from "./generate"; import { runMcpFlow } from "./mcp/runMcpFlow"; import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators"; import type { TextGenerationContext } from "./types"; async function* keepAlive(done: AbortSignal): AsyncGenerator { while (!done.aborted) { yield { type: MessageUpdateType.Status, status: MessageUpdateStatus.KeepAlive, }; await new Promise((resolve) => setTimeout(resolve, 100)); } } export async function* textGeneration(ctx: TextGenerationContext) { const done = new AbortController(); const titleGen = generateTitleForConversation(ctx.conv, ctx.locals); const textGen = textGenerationWithoutTitle(ctx, done); const keepAliveGen = keepAlive(done.signal); // keep alive until textGen is done yield* mergeAsyncGenerators([titleGen, textGen, keepAliveGen]); } async function* textGenerationWithoutTitle( ctx: TextGenerationContext, done: AbortController ): AsyncGenerator { yield { type: MessageUpdateType.Status, status: MessageUpdateStatus.Started, }; const { conv, messages } = ctx; const convId = conv._id; const preprompt = conv.preprompt; const processedMessages = await preprocessMessages(messages, convId); // Try MCP tool flow first; fall back to default generation if not selected/available try { const mcpGen = runMcpFlow({ model: ctx.model, conv, messages: processedMessages, assistant: ctx.assistant, forceMultimodal: ctx.forceMultimodal, forceTools: ctx.forceTools, provider: ctx.provider, locals: ctx.locals, preprompt, abortSignal: ctx.abortController.signal, abortController: ctx.abortController, promptedAt: ctx.promptedAt, }); let step = await mcpGen.next(); while (!step.done) { yield step.value; step = await mcpGen.next(); } const mcpResult = step.value; if (mcpResult === "not_applicable") { // fallback to normal text generation yield* generate({ ...ctx, messages: processedMessages }, preprompt); } // If mcpResult is "completed" or "aborted", don't fall back } catch (err) { // Don't fall back on abort errors - user intentionally stopped const isAbort = ctx.abortController.signal.aborted || (err instanceof Error && (err.name === "AbortError" || err.name === "APIUserAbortError" || err.message.includes("Request was aborted"))); if (!isAbort) { // On non-abort MCP error, fall back to normal generation yield* generate({ ...ctx, messages: processedMessages }, preprompt); } } done.abort(); } ================================================ FILE: src/lib/server/textGeneration/mcp/fileRefs.ts ================================================ import type { EndpointMessage } from "../../endpoints/endpoints"; export type FileRefPayload = { name: string; mime: string; base64: string; }; export type RefKind = { prefix: string; matches: (mime: string) => boolean; toDataUrl?: (payload: FileRefPayload) => string; }; export type ResolvedFileRef = FileRefPayload & { refKind: RefKind }; export type FileRefResolver = (ref: string) => ResolvedFileRef | undefined; const IMAGE_REF_KIND: RefKind = { prefix: "image", matches: (mime) => typeof mime === "string" && mime.startsWith("image/"), toDataUrl: (payload) => `data:${payload.mime};base64,${payload.base64}`, }; const DEFAULT_REF_KINDS: RefKind[] = [IMAGE_REF_KIND]; /** * Build a resolver that maps short ref strings (e.g. "image_1", "image_2") to the * corresponding file payload across the whole conversation in chronological * order of user uploads. (image_1 = first user-uploaded image, image_2 = second, etc.) * Currently only images are exposed to end users, but the plumbing supports * additional kinds later. */ export function buildFileRefResolver( messages: EndpointMessage[], refKinds: RefKind[] = DEFAULT_REF_KINDS ): FileRefResolver | undefined { if (!Array.isArray(refKinds) || refKinds.length === 0) return undefined; // Bucket matched files by ref kind preserving conversation order (oldest -> newest) const buckets = new Map(); for (const msg of messages) { if (msg.from !== "user") continue; for (const file of msg.files ?? []) { const mime = file?.mime ?? ""; const kind = refKinds.find((k) => k.matches(mime)); if (!kind) continue; const payload: FileRefPayload = { name: file.name, mime, base64: file.value }; const arr = buckets.get(kind) ?? []; arr.push(payload); buckets.set(kind, arr); } } if (buckets.size === 0) return undefined; const resolver: FileRefResolver = (ref) => { if (!ref || typeof ref !== "string") return undefined; const trimmed = ref.trim().toLowerCase(); for (const kind of refKinds) { const match = new RegExp(`^${kind.prefix}_(\\d+)$`).exec(trimmed); if (!match) continue; const idx = Number(match[1]) - 1; const files = buckets.get(kind) ?? []; if (Number.isFinite(idx) && idx >= 0 && idx < files.length) { const payload = files[idx]; return payload ? { ...payload, refKind: kind } : undefined; } } return undefined; }; return resolver; } export function buildImageRefResolver(messages: EndpointMessage[]): FileRefResolver | undefined { return buildFileRefResolver(messages, [IMAGE_REF_KIND]); } type FieldRule = { keys: string[]; action: "attachPayload" | "replaceWithDataUrl"; attachKey?: string; allowedPrefixes?: string[]; // limit to specific ref kinds (e.g. ["image"]) }; const DEFAULT_FIELD_RULES: FieldRule[] = [ { keys: ["image_ref"], action: "attachPayload", attachKey: "image", allowedPrefixes: ["image"], }, { keys: ["input_image", "image", "image_url"], action: "replaceWithDataUrl", allowedPrefixes: ["image"], }, ]; /** * Walk tool args and hydrate known ref fields while keeping logging lightweight. * Only image refs are recognized for now to preserve current behavior. */ export function attachFileRefsToArgs( argsObj: Record, resolveRef?: FileRefResolver, fieldRules: FieldRule[] = DEFAULT_FIELD_RULES ): void { if (!resolveRef) return; const visit = (node: unknown): void => { if (!node || typeof node !== "object") return; if (Array.isArray(node)) { for (const v of node) visit(v); return; } const obj = node as Record; for (const [key, value] of Object.entries(obj)) { if (typeof value !== "string") { if (value && typeof value === "object") visit(value); continue; } const resolved = resolveRef(value); if (!resolved) continue; const rule = fieldRules.find((r) => r.keys.includes(key)); if (!rule) continue; if (rule.allowedPrefixes && !rule.allowedPrefixes.includes(resolved.refKind.prefix)) continue; if (rule.action === "attachPayload") { const targetKey = rule.attachKey ?? "file"; if ( typeof obj[targetKey] !== "object" || obj[targetKey] === null || Array.isArray(obj[targetKey]) ) { obj[targetKey] = { name: resolved.name, mime: resolved.mime, base64: resolved.base64, }; } } else if (rule.action === "replaceWithDataUrl") { const toUrl = resolved.refKind.toDataUrl ?? ((p: FileRefPayload) => `data:${p.mime};base64,${p.base64}`); obj[key] = toUrl(resolved); } } }; visit(argsObj); } ================================================ FILE: src/lib/server/textGeneration/mcp/routerResolution.ts ================================================ import { config } from "$lib/server/config"; import { archSelectRoute } from "$lib/server/router/arch"; import { getRoutes, resolveRouteModels } from "$lib/server/router/policy"; import { hasActiveToolsSelection, isRouterToolsBypassEnabled, pickToolsCapableModel, ROUTER_TOOLS_ROUTE, } from "$lib/server/router/toolsRoute"; import { findConfiguredMultimodalModel } from "$lib/server/router/multimodal"; import type { EndpointMessage } from "../../endpoints/endpoints"; import { stripReasoningFromMessageForRouting } from "../utils/routing"; import type { ProcessedModel } from "../../models"; import { logger } from "../../logger"; export interface RouterResolutionInput { model: ProcessedModel; messages: EndpointMessage[]; conversationId: string; hasImageInput: boolean; locals: App.Locals | undefined; } export interface RouterResolutionResult { runMcp: boolean; targetModel: ProcessedModel; candidateModelId?: string; resolvedRoute?: string; } export async function resolveRouterTarget({ model, messages, conversationId, hasImageInput, locals, }: RouterResolutionInput): Promise { let targetModel = model; let candidateModelId: string | undefined; let resolvedRoute: string | undefined; let runMcp = true; if (!model.isRouter) { return { runMcp, targetModel }; } try { const mod = await import("../../models"); const allModels = mod.models as ProcessedModel[]; if (hasImageInput) { const multimodalCandidate = findConfiguredMultimodalModel(allModels); if (!multimodalCandidate) { runMcp = false; logger.warn( { configuredModel: config.LLM_ROUTER_MULTIMODAL_MODEL }, "[mcp] multimodal input but configured model missing or invalid; skipping MCP route" ); } else { targetModel = multimodalCandidate; candidateModelId = multimodalCandidate.id ?? multimodalCandidate.name; resolvedRoute = "multimodal"; } } else { // If tools are enabled and at least one MCP server is active, prefer a tools-capable model const toolsEnabled = isRouterToolsBypassEnabled(); const hasToolsActive = hasActiveToolsSelection(locals); if (toolsEnabled && hasToolsActive) { const found = pickToolsCapableModel(allModels); if (found) { targetModel = found; candidateModelId = found.id ?? found.name; resolvedRoute = ROUTER_TOOLS_ROUTE; // Continue; runMcp remains true return { runMcp, targetModel, candidateModelId, resolvedRoute }; } // No tools-capable model found; fall back to normal Arch routing below } const routes = await getRoutes(); const sanitized = messages.map(stripReasoningFromMessageForRouting); const { routeName } = await archSelectRoute(sanitized, conversationId, locals); resolvedRoute = routeName; const fallbackModel = config.LLM_ROUTER_FALLBACK_MODEL || model.id; const { candidates } = resolveRouteModels(routeName, routes, fallbackModel); const primaryCandidateId = candidates[0]; if (!primaryCandidateId || primaryCandidateId === fallbackModel) { runMcp = false; } else { const found = allModels?.find( (candidate) => candidate.id === primaryCandidateId || candidate.name === primaryCandidateId ); if (found) { targetModel = found; candidateModelId = primaryCandidateId; } else { runMcp = false; } } } } catch (error) { logger.warn({ err: String(error) }, "[mcp] routing preflight failed"); runMcp = false; } return { runMcp, targetModel, candidateModelId, resolvedRoute }; } ================================================ FILE: src/lib/server/textGeneration/mcp/runMcpFlow.ts ================================================ import { config } from "$lib/server/config"; import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate"; import { getMcpServers } from "$lib/server/mcp/registry"; import { isValidUrl } from "$lib/server/urlSafety"; import { resetMcpToolsCache } from "$lib/server/mcp/tools"; import { getOpenAiToolsForMcp } from "$lib/server/mcp/tools"; import type { ChatCompletionChunk, ChatCompletionCreateParamsStreaming, ChatCompletionMessageParam, ChatCompletionMessageToolCall, } from "openai/resources/chat/completions"; import type { Stream } from "openai/streaming"; import { buildToolPreprompt } from "../utils/toolPrompt"; import type { EndpointMessage } from "../../endpoints/endpoints"; import { resolveRouterTarget } from "./routerResolution"; import { executeToolCalls, type NormalizedToolCall } from "./toolInvocation"; import { drainPool } from "$lib/server/mcp/clientPool"; import type { TextGenerationContext } from "../types"; import { hasAuthHeader, isStrictHfMcpLogin, hasNonEmptyToken, isExaMcpServer, } from "$lib/server/mcp/hf"; import { buildImageRefResolver } from "./fileRefs"; import { prepareMessagesWithFiles } from "$lib/server/textGeneration/utils/prepareFiles"; import { makeImageProcessor } from "$lib/server/endpoints/images"; import { logger } from "$lib/server/logger"; import { AbortedGenerations } from "$lib/server/abortedGenerations"; export type RunMcpFlowContext = Pick< TextGenerationContext, "model" | "conv" | "assistant" | "forceMultimodal" | "forceTools" | "provider" | "locals" > & { messages: EndpointMessage[] }; // Return type: "completed" = MCP ran successfully, "not_applicable" = MCP didn't run, "aborted" = user aborted export type McpFlowResult = "completed" | "not_applicable" | "aborted"; export async function* runMcpFlow({ model, conv, messages, assistant, forceMultimodal, forceTools, provider, locals, preprompt, abortSignal, abortController, promptedAt, }: RunMcpFlowContext & { preprompt?: string; abortSignal?: AbortSignal; abortController?: AbortController; promptedAt?: Date; }): AsyncGenerator { // Helper to check if generation should be aborted via DB polling // Also triggers the abort controller to cancel active streams/requests const checkAborted = (): boolean => { if (abortSignal?.aborted) return true; const abortTime = AbortedGenerations.getInstance().getAbortTime(conv._id.toString()); if (abortTime && promptedAt && abortTime > promptedAt) { // Trigger the abort controller to cancel active streams if (abortController && !abortController.signal.aborted) { abortController.abort(); } return true; } return false; }; // Start from env-configured servers let servers = getMcpServers(); try { logger.debug( { baseServers: servers.map((s) => ({ name: s.name, url: s.url })), count: servers.length }, "[mcp] base servers loaded" ); } catch {} // Merge in request-provided custom servers (if any) try { const reqMcp = ( locals as unknown as { mcp?: { selectedServers?: Array<{ name: string; url: string; headers?: Record }>; selectedServerNames?: string[]; }; } )?.mcp; const custom = Array.isArray(reqMcp?.selectedServers) ? reqMcp?.selectedServers : []; if (custom.length > 0) { // Invalidate cached tool list when the set of servers changes at request-time resetMcpToolsCache(); // Deduplicate by server name (request takes precedence) const byName = new Map< string, { name: string; url: string; headers?: Record } >(); for (const s of servers) byName.set(s.name, s); for (const s of custom) byName.set(s.name, s); servers = [...byName.values()]; try { logger.debug( { customProvidedCount: custom.length, mergedServers: servers.map((s) => ({ name: s.name, url: s.url, hasAuth: !!s.headers?.Authorization, })), }, "[mcp] merged request-provided servers" ); } catch {} } // If the client specified a selection by name, filter to those const names = Array.isArray(reqMcp?.selectedServerNames) ? reqMcp?.selectedServerNames : undefined; if (Array.isArray(names)) { const before = servers.map((s) => s.name); servers = servers.filter((s) => names.includes(s.name)); try { logger.debug( { selectedNames: names, before, after: servers.map((s) => s.name) }, "[mcp] applied name selection" ); } catch {} } } catch { // ignore selection merge errors and proceed with env servers } // If selection/merge yielded no servers, bail early with clearer log if (servers.length === 0) { logger.warn({}, "[mcp] no MCP servers selected after merge/name filter"); return "not_applicable"; } // Enforce server-side safety (public HTTPS only, no private ranges) { const before = servers.slice(); servers = servers.filter((s) => { try { return isValidUrl(s.url); } catch { return false; } }); try { const rejected = before.filter((b) => !servers.includes(b)); if (rejected.length > 0) { logger.warn( { rejected: rejected.map((r) => ({ name: r.name, url: r.url })) }, "[mcp] rejected servers by URL safety" ); } } catch {} } if (servers.length === 0) { logger.warn({}, "[mcp] all selected MCP servers rejected by URL safety guard"); return "not_applicable"; } // Optionally attach the logged-in user's HF token to the official HF MCP server only. // Never override an explicit Authorization header, and require token to look like an HF token. try { const shouldForward = config.MCP_FORWARD_HF_USER_TOKEN === "true"; const userToken = (locals as unknown as { hfAccessToken?: string } | undefined)?.hfAccessToken ?? (locals as unknown as { token?: string } | undefined)?.token; if (shouldForward && hasNonEmptyToken(userToken)) { const overlayApplied: string[] = []; servers = servers.map((s) => { try { if (isStrictHfMcpLogin(s.url) && !hasAuthHeader(s.headers)) { overlayApplied.push(s.name); return { ...s, headers: { ...(s.headers ?? {}), Authorization: `Bearer ${userToken}` }, }; } } catch { // ignore URL parse errors and leave server unchanged } return s; }); if (overlayApplied.length > 0) { try { logger.debug({ overlayApplied }, "[mcp] forwarded HF token to servers"); } catch {} } } } catch { // best-effort overlay; continue if anything goes wrong } // Inject Exa API key for mcp.exa.ai servers via URL param (mcp.exa.ai doesn't support headers) try { const exaApiKey = config.EXA_API_KEY; if (hasNonEmptyToken(exaApiKey)) { const overlayApplied: string[] = []; servers = servers.map((s) => { try { if (isExaMcpServer(s.url)) { const url = new URL(s.url); if (!url.searchParams.has("exaApiKey")) { url.searchParams.set("exaApiKey", exaApiKey); overlayApplied.push(s.name); return { ...s, url: url.toString() }; } } } catch {} return s; }); if (overlayApplied.length > 0) { logger.debug({ overlayApplied }, "[mcp] injected Exa API key to servers"); } } } catch { // best-effort injection; continue if anything goes wrong } logger.debug( { count: servers.length, servers: servers.map((s) => s.name) }, "[mcp] servers configured" ); if (servers.length === 0) { return "not_applicable"; } // Gate MCP flow based on model tool support (aggregated) with user override try { const supportsTools = Boolean((model as unknown as { supportsTools?: boolean }).supportsTools); const toolsEnabled = Boolean(forceTools) || supportsTools; logger.debug( { model: model.id ?? model.name, supportsTools, forceTools: Boolean(forceTools), toolsEnabled, }, "[mcp] tools gate evaluation" ); if (!toolsEnabled) { logger.info( { model: model.id ?? model.name }, "[mcp] tools disabled for model; skipping MCP flow" ); return "not_applicable"; } } catch { // If anything goes wrong reading the flag, proceed (previous behavior) } const resolveFileRef = buildImageRefResolver(messages); const imageProcessor = makeImageProcessor({ supportedMimeTypes: ["image/png", "image/jpeg"], preferredMimeType: "image/jpeg", maxSizeInMB: 1, maxWidth: 1024, maxHeight: 1024, }); const hasImageInput = messages.some((msg) => (msg.files ?? []).some( (file) => typeof file?.mime === "string" && file.mime.startsWith("image/") ) ); const { runMcp, targetModel, candidateModelId, resolvedRoute } = await resolveRouterTarget({ model, messages, conversationId: conv._id.toString(), hasImageInput, locals, }); if (!runMcp) { logger.info( { model: targetModel.id ?? targetModel.name, resolvedRoute }, "[mcp] runMcp=false (routing chose non-tools candidate)" ); return "not_applicable"; } try { const { tools: oaTools, mapping } = await getOpenAiToolsForMcp(servers, { signal: abortSignal, }); try { logger.info( { toolCount: oaTools.length, toolNames: oaTools.map((t) => t.function.name) }, "[mcp] openai tool defs built" ); } catch {} if (oaTools.length === 0) { logger.warn({}, "[mcp] zero tools available after listing; skipping MCP flow"); return "not_applicable"; } const { OpenAI } = await import("openai"); // Capture provider header (x-inference-provider) from the upstream OpenAI-compatible server. let providerHeader: string | undefined; const captureProviderFetch = async ( input: RequestInfo | URL, init?: RequestInit ): Promise => { const res = await fetch(input, init); const p = res.headers.get("x-inference-provider"); if (p && !providerHeader) providerHeader = p; return res; }; const openai = new OpenAI({ apiKey: config.OPENAI_API_KEY || config.HF_TOKEN || "sk-", baseURL: config.OPENAI_BASE_URL, fetch: captureProviderFetch, defaultHeaders: { // Bill to organization if configured (HuggingChat only) ...(config.isHuggingChat && locals?.billingOrganization ? { "X-HF-Bill-To": locals.billingOrganization } : {}), }, }); const mmEnabled = (forceMultimodal ?? false) || targetModel.multimodal; logger.info( { targetModel: targetModel.id ?? targetModel.name, mmEnabled, route: resolvedRoute, candidateModelId, toolCount: oaTools.length, hasUserToken: Boolean((locals as unknown as { token?: string })?.token), }, "[mcp] starting completion with tools" ); let messagesOpenAI: ChatCompletionMessageParam[] = await prepareMessagesWithFiles( messages, imageProcessor, mmEnabled ); const toolPreprompt = buildToolPreprompt(oaTools); const prepromptPieces: string[] = []; if (toolPreprompt.trim().length > 0) { prepromptPieces.push(toolPreprompt); } if (typeof preprompt === "string" && preprompt.trim().length > 0) { prepromptPieces.push(preprompt); } const mergedPreprompt = prepromptPieces.join("\n\n"); const hasSystemMessage = messagesOpenAI.length > 0 && messagesOpenAI[0]?.role === "system"; if (hasSystemMessage) { if (mergedPreprompt.length > 0) { const existing = messagesOpenAI[0].content ?? ""; const existingText = typeof existing === "string" ? existing : ""; messagesOpenAI[0].content = mergedPreprompt + (existingText ? "\n\n" + existingText : ""); } } else if (mergedPreprompt.length > 0) { messagesOpenAI = [{ role: "system", content: mergedPreprompt }, ...messagesOpenAI]; } // Work around servers that reject `system` role if ( typeof config.OPENAI_BASE_URL === "string" && config.OPENAI_BASE_URL.length > 0 && (config.OPENAI_BASE_URL.includes("hf.space") || config.OPENAI_BASE_URL.includes("gradio.app")) && messagesOpenAI[0]?.role === "system" ) { messagesOpenAI[0] = { ...messagesOpenAI[0], role: "user" }; } const parameters = { ...targetModel.parameters, ...assistant?.generateSettings } as Record< string, unknown >; const maxTokens = (parameters?.max_tokens as number | undefined) ?? (parameters?.max_new_tokens as number | undefined) ?? (parameters?.max_completion_tokens as number | undefined); const stopSequences = typeof parameters?.stop === "string" ? parameters.stop : Array.isArray(parameters?.stop) ? (parameters.stop as string[]) : undefined; // Build model ID with optional provider suffix (e.g., "model:fastest" or "model:together") const baseModelId = targetModel.id ?? targetModel.name; const modelIdWithProvider = provider && provider !== "auto" ? `${baseModelId}:${provider}` : baseModelId; const completionBase: Omit = { model: modelIdWithProvider, stream: true, temperature: typeof parameters?.temperature === "number" ? parameters.temperature : undefined, top_p: typeof parameters?.top_p === "number" ? parameters.top_p : undefined, frequency_penalty: typeof parameters?.frequency_penalty === "number" ? parameters.frequency_penalty : typeof parameters?.repetition_penalty === "number" ? parameters.repetition_penalty : undefined, presence_penalty: typeof parameters?.presence_penalty === "number" ? parameters.presence_penalty : undefined, stop: stopSequences, max_tokens: typeof maxTokens === "number" ? maxTokens : undefined, tools: oaTools, tool_choice: "auto", }; const toPrimitive = (value: unknown) => { if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { return value; } return undefined; }; const parseArgs = (raw: unknown): Record => { if (typeof raw !== "string" || raw.trim().length === 0) return {}; try { return JSON.parse(raw); } catch { return {}; } }; const processToolOutput = ( text: string ): { annotated: string; sources: { index: number; link: string }[]; } => ({ annotated: text, sources: [] }); let lastAssistantContent = ""; let streamedContent = false; // Track whether we're inside a block when the upstream streams // provider-specific reasoning tokens (e.g. `reasoning` or `reasoning_content`). let thinkOpen = false; if (resolvedRoute && candidateModelId) { yield { type: MessageUpdateType.RouterMetadata, route: resolvedRoute, model: candidateModelId, }; logger.debug( { route: resolvedRoute, model: candidateModelId }, "[mcp] router metadata emitted" ); } for (let loop = 0; loop < 10; loop += 1) { // Check for abort at the start of each loop iteration if (checkAborted()) { logger.info({ loop }, "[mcp] aborting at start of loop iteration"); return "aborted"; } lastAssistantContent = ""; streamedContent = false; const completionRequest: ChatCompletionCreateParamsStreaming = { ...completionBase, messages: messagesOpenAI, }; const completionStream: Stream = await openai.chat.completions.create( completionRequest, { signal: abortSignal, headers: { "ChatUI-Conversation-ID": conv._id.toString(), "X-use-cache": "false", ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), }, } ); // If provider header was exposed, notify UI so it can render "via {provider}". if (providerHeader) { yield { type: MessageUpdateType.RouterMetadata, route: "", model: "", provider: providerHeader as unknown as import("@huggingface/inference").InferenceProvider, }; logger.debug({ provider: providerHeader }, "[mcp] provider metadata emitted"); } const toolCallState: Record = {}; let firstToolDeltaLogged = false; let sawToolCall = false; let tokenCount = 0; for await (const chunk of completionStream) { const choice = chunk.choices?.[0]; const delta = choice?.delta; if (!delta) continue; const chunkToolCalls = delta.tool_calls ?? []; if (chunkToolCalls.length > 0) { sawToolCall = true; for (const call of chunkToolCalls) { const toolCall = call as unknown as { index?: number; id?: string; function?: { name?: string; arguments?: string }; }; const index = toolCall.index ?? 0; const current = toolCallState[index] ?? { arguments: "" }; if (toolCall.id) current.id = toolCall.id; if (toolCall.function?.name) current.name = toolCall.function.name; if (toolCall.function?.arguments) current.arguments += toolCall.function.arguments; toolCallState[index] = current; } if (!firstToolDeltaLogged) { try { const first = toolCallState[ Object.keys(toolCallState) .map((k) => Number(k)) .sort((a, b) => a - b)[0] ?? 0 ]; logger.info( { firstCallName: first?.name, hasId: Boolean(first?.id) }, "[mcp] observed streamed tool_call delta" ); firstToolDeltaLogged = true; } catch {} } } const deltaContent = (() => { if (typeof delta.content === "string") return delta.content; const maybeParts = delta.content as unknown; if (Array.isArray(maybeParts)) { return maybeParts .map((part) => typeof part === "object" && part !== null && "text" in part && typeof (part as Record).text === "string" ? String((part as Record).text) : "" ) .join(""); } return ""; })(); // Provider-dependent reasoning fields (e.g., `reasoning` or `reasoning_content`). const deltaReasoning: string = typeof (delta as unknown as Record)?.reasoning === "string" ? ((delta as unknown as { reasoning?: string }).reasoning as string) : typeof (delta as unknown as Record)?.reasoning_content === "string" ? ((delta as unknown as { reasoning_content?: string }).reasoning_content as string) : ""; // Merge reasoning + content into a single combined token stream, mirroring // the OpenAI adapter so the UI can auto-detect blocks. let combined = ""; if (deltaReasoning.trim().length > 0) { if (!thinkOpen) { combined += "" + deltaReasoning; thinkOpen = true; } else { combined += deltaReasoning; } } if (deltaContent && deltaContent.length > 0) { if (thinkOpen) { combined += "" + deltaContent; thinkOpen = false; } else { combined += deltaContent; } } if (combined.length > 0) { lastAssistantContent += combined; if (!sawToolCall) { streamedContent = true; yield { type: MessageUpdateType.Stream, token: combined }; tokenCount += combined.length; } } // Periodic abort check during streaming if (checkAborted()) { logger.info({ loop, tokenCount }, "[mcp] aborting during stream"); return "aborted"; } } logger.info( { sawToolCalls: Object.keys(toolCallState).length > 0, tokens: tokenCount, loop }, "[mcp] completion stream closed" ); // Check abort after stream completes if (checkAborted()) { logger.info({ loop }, "[mcp] aborting after stream completed"); return "aborted"; } // Auto-close any unclosed block so reasoning from this loop // doesn't swallow content from subsequent iterations. The client-side // regex matches `` to end-of-string, so an unclosed block would // hide everything that follows. if (thinkOpen) { if (streamedContent) { yield { type: MessageUpdateType.Stream, token: "" }; } lastAssistantContent += ""; thinkOpen = false; } if (Object.keys(toolCallState).length > 0) { // If any streamed call is missing id, perform a quick non-stream retry to recover full tool_calls with ids const missingId = Object.values(toolCallState).some((c) => c?.name && !c?.id); let calls: NormalizedToolCall[]; if (missingId) { logger.debug( { loop }, "[mcp] missing tool_call id in stream; retrying non-stream to recover ids" ); const nonStream = await openai.chat.completions.create( { ...completionBase, messages: messagesOpenAI, stream: false }, { signal: abortSignal, headers: { "ChatUI-Conversation-ID": conv._id.toString(), "X-use-cache": "false", ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), }, } ); const tc = nonStream.choices?.[0]?.message?.tool_calls ?? []; calls = tc.map((t) => ({ id: t.id, name: t.function?.name ?? "", arguments: t.function?.arguments ?? "", })); } else { calls = Object.values(toolCallState) .map((c) => (c?.id && c?.name ? c : undefined)) .filter(Boolean) .map((c) => ({ id: c?.id ?? "", name: c?.name ?? "", arguments: c?.arguments ?? "", })) as NormalizedToolCall[]; } // Include the assistant message with tool_calls so the next round // sees both the calls and their outputs, matching MCP branch behavior. const toolCalls: ChatCompletionMessageToolCall[] = calls.map((call) => ({ id: call.id, type: "function", function: { name: call.name, arguments: call.arguments }, })); // Avoid sending content back to the model alongside tool_calls // to prevent confusing follow-up reasoning. Strip any think blocks. const assistantContentForToolMsg = lastAssistantContent.replace( /[\s\S]*?(?:<\/think>|$)/g, "" ); const assistantToolMessage: ChatCompletionMessageParam = { role: "assistant", content: assistantContentForToolMsg, tool_calls: toolCalls, }; const exec = executeToolCalls({ calls, mapping, servers, parseArgs, resolveFileRef, toPrimitive, processToolOutput, abortSignal, }); let toolMsgCount = 0; let toolRunCount = 0; for await (const event of exec) { if (event.type === "update") { yield event.update; } else { messagesOpenAI = [ ...messagesOpenAI, assistantToolMessage, ...(event.summary.toolMessages ?? []), ]; toolMsgCount = event.summary.toolMessages?.length ?? 0; toolRunCount = event.summary.toolRuns?.length ?? 0; logger.info( { toolMsgCount, toolRunCount }, "[mcp] tools executed; continuing loop for follow-up completion" ); } // Check abort during tool execution if (checkAborted()) { logger.info({ loop, toolMsgCount }, "[mcp] aborting during tool execution"); return "aborted"; } } // Check abort after all tools complete before continuing loop if (checkAborted()) { logger.info({ loop }, "[mcp] aborting after tool execution"); return "aborted"; } // Continue loop: next iteration will use tool messages to get the final content continue; } // No tool calls: finalize and return // If a block is still open, close it for the final output if (thinkOpen) { lastAssistantContent += ""; thinkOpen = false; } if (!streamedContent && lastAssistantContent.trim().length > 0) { yield { type: MessageUpdateType.Stream, token: lastAssistantContent }; } yield { type: MessageUpdateType.FinalAnswer, text: lastAssistantContent, interrupted: false, }; logger.info( { length: lastAssistantContent.length, loop }, "[mcp] final answer emitted (no tool_calls)" ); return "completed"; } logger.warn({}, "[mcp] exceeded tool-followup loops; falling back"); } catch (err) { const msg = String(err ?? ""); const isAbort = (abortSignal && abortSignal.aborted) || msg.includes("AbortError") || msg.includes("APIUserAbortError") || msg.includes("Request was aborted"); if (isAbort) { // Expected on user stop; keep logs quiet and do not treat as error logger.debug({}, "[mcp] aborted by user"); return "aborted"; } logger.warn({ err: msg }, "[mcp] flow failed, falling back to default endpoint"); } finally { // ensure MCP clients are closed after the turn await drainPool(); } return "not_applicable"; } ================================================ FILE: src/lib/server/textGeneration/mcp/toolInvocation.ts ================================================ import { randomUUID } from "crypto"; import { logger } from "../../logger"; import type { MessageUpdate } from "$lib/types/MessageUpdate"; import { MessageToolUpdateType, MessageUpdateType } from "$lib/types/MessageUpdate"; import { ToolResultStatus } from "$lib/types/Tool"; import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; import type { McpToolMapping } from "$lib/server/mcp/tools"; import type { McpServerConfig } from "$lib/server/mcp/httpClient"; import { callMcpTool, getMcpToolTimeoutMs, type McpToolTextResponse, } from "$lib/server/mcp/httpClient"; import { getClient } from "$lib/server/mcp/clientPool"; import { attachFileRefsToArgs, type FileRefResolver } from "./fileRefs"; import type { Client } from "@modelcontextprotocol/sdk/client"; export type Primitive = string | number | boolean; export type ToolRun = { name: string; parameters: Record; output: string; }; export interface NormalizedToolCall { id: string; name: string; arguments: string; } export interface ExecuteToolCallsParams { calls: NormalizedToolCall[]; mapping: Record; servers: McpServerConfig[]; parseArgs: (raw: unknown) => Record; resolveFileRef?: FileRefResolver; toPrimitive: (value: unknown) => Primitive | undefined; processToolOutput: (text: string) => { annotated: string; sources: { index: number; link: string }[]; }; abortSignal?: AbortSignal; toolTimeoutMs?: number; } export interface ToolCallExecutionResult { toolMessages: ChatCompletionMessageParam[]; toolRuns: ToolRun[]; finalAnswer?: { text: string; interrupted: boolean }; } export type ToolExecutionEvent = | { type: "update"; update: MessageUpdate } | { type: "complete"; summary: ToolCallExecutionResult }; const serverMap = (servers: McpServerConfig[]): Map => { const map = new Map(); for (const server of servers) { if (server?.name) { map.set(server.name, server); } } return map; }; export async function* executeToolCalls({ calls, mapping, servers, parseArgs, resolveFileRef, toPrimitive, processToolOutput, abortSignal, toolTimeoutMs, }: ExecuteToolCallsParams): AsyncGenerator { const effectiveTimeoutMs = toolTimeoutMs ?? getMcpToolTimeoutMs(); const toolMessages: ChatCompletionMessageParam[] = []; const toolRuns: ToolRun[] = []; const serverLookup = serverMap(servers); // Pre-emit call + ETA updates and prepare tasks type TaskResult = { index: number; output?: string; structured?: unknown; blocks?: unknown[]; error?: string; uuid: string; paramsClean: Record; }; const prepared = calls.map((call) => { const argsObj = parseArgs(call.arguments); const paramsClean: Record = {}; for (const [k, v] of Object.entries(argsObj ?? {})) { const prim = toPrimitive(v); if (prim !== undefined) paramsClean[k] = prim; } // Attach any resolved image payloads _after_ computing paramsClean so that // logging / status updates continue to show only the lightweight primitive // arguments (e.g. "image_1") while the full data: URLs or image blobs are // only sent to the MCP tool server. attachFileRefsToArgs(argsObj, resolveFileRef); return { call, argsObj, paramsClean, uuid: randomUUID() }; }); for (const p of prepared) { yield { type: "update", update: { type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Call, uuid: p.uuid, call: { name: p.call.name, parameters: p.paramsClean }, }, }; yield { type: "update", update: { type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.ETA, uuid: p.uuid, eta: 10, }, }; } // Preload clients per distinct server used in this batch const distinctServerNames = Array.from( new Set(prepared.map((p) => mapping[p.call.name]?.server).filter(Boolean) as string[]) ); const clientMap = new Map(); await Promise.all( distinctServerNames.map(async (name) => { const cfg = serverLookup.get(name); if (!cfg) return; try { const client = await getClient(cfg, abortSignal); clientMap.set(name, client); } catch (e) { logger.warn({ server: name, err: String(e) }, "[mcp] failed to connect client"); } }) ); // Async queue to stream results in finish order function createQueue() { const items: T[] = []; const waiters: Array<(v: IteratorResult) => void> = []; let closed = false; return { push(item: T) { const waiter = waiters.shift(); if (waiter) waiter({ value: item, done: false }); else items.push(item); }, close() { closed = true; let waiter: ((v: IteratorResult) => void) | undefined; while ((waiter = waiters.shift())) { waiter({ value: undefined as unknown as T, done: true }); } }, async *iterator() { for (;;) { if (items.length) { const first = items.shift(); if (first !== undefined) yield first as T; continue; } if (closed) return; const value: IteratorResult = await new Promise((res) => waiters.push(res)); if (value.done) return; yield value.value as T; } }, }; } const updatesQueue = createQueue(); const results: TaskResult[] = []; const tasks = prepared.map(async (p, index) => { // Check abort before starting each tool call if (abortSignal?.aborted) { const message = "Aborted by user"; results.push({ index, error: message, uuid: p.uuid, paramsClean: p.paramsClean, }); updatesQueue.push({ type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Error, uuid: p.uuid, message, }); return; } const mappingEntry = mapping[p.call.name]; if (!mappingEntry) { const message = `Unknown MCP function: ${p.call.name}`; results.push({ index, error: message, uuid: p.uuid, paramsClean: p.paramsClean, }); updatesQueue.push({ type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Error, uuid: p.uuid, message, }); return; } const serverCfg = serverLookup.get(mappingEntry.server); if (!serverCfg) { const message = `Unknown MCP server: ${mappingEntry.server}`; results.push({ index, error: message, uuid: p.uuid, paramsClean: p.paramsClean, }); updatesQueue.push({ type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Error, uuid: p.uuid, message, }); return; } const client = clientMap.get(mappingEntry.server); try { logger.debug( { server: mappingEntry.server, tool: mappingEntry.tool, parameters: p.paramsClean }, "[mcp] invoking tool" ); const toolResponse: McpToolTextResponse = await callMcpTool( serverCfg, mappingEntry.tool, p.argsObj, { client, signal: abortSignal, timeoutMs: effectiveTimeoutMs, onProgress: (progress) => { updatesQueue.push({ type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Progress, uuid: p.uuid, progress: progress.progress, total: progress.total, message: progress.message, }); }, } ); const { annotated } = processToolOutput(toolResponse.text ?? ""); logger.debug( { server: mappingEntry.server, tool: mappingEntry.tool }, "[mcp] tool call completed" ); results.push({ index, output: annotated, structured: toolResponse.structured, blocks: toolResponse.content, uuid: p.uuid, paramsClean: p.paramsClean, }); updatesQueue.push({ type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Result, uuid: p.uuid, result: { status: ToolResultStatus.Success, call: { name: p.call.name, parameters: p.paramsClean }, outputs: [ { text: annotated ?? "", structured: toolResponse.structured, content: toolResponse.content, } as unknown as Record, ], display: true, }, }); } catch (err) { const errMsg = err instanceof Error ? err.message : String(err); const errName = err instanceof Error ? err.name : ""; const isAbortError = abortSignal?.aborted || errName === "AbortError" || errName === "APIUserAbortError" || errMsg === "Request was aborted." || errMsg === "This operation was aborted"; const message = isAbortError ? "Aborted by user" : errMsg; if (isAbortError) { logger.debug( { server: mappingEntry.server, tool: mappingEntry.tool }, "[mcp] tool call aborted by user" ); } else { logger.warn( { server: mappingEntry.server, tool: mappingEntry.tool, err: message }, "[mcp] tool call failed" ); } results.push({ index, error: message, uuid: p.uuid, paramsClean: p.paramsClean }); updatesQueue.push({ type: MessageUpdateType.Tool, subtype: MessageToolUpdateType.Error, uuid: p.uuid, message, }); } }); // kick off and stream as they finish Promise.allSettled(tasks).then(() => updatesQueue.close()); for await (const update of updatesQueue.iterator()) { yield { type: "update", update }; } // Collate outputs in original call order results.sort((a, b) => a.index - b.index); for (const r of results) { const name = prepared[r.index].call.name; const id = prepared[r.index].call.id; if (!r.error) { const output = r.output ?? ""; toolRuns.push({ name, parameters: r.paramsClean, output }); // For the LLM follow-up call, we keep only the textual output toolMessages.push({ role: "tool", tool_call_id: id, content: output }); } else { // Communicate error to LLM so it doesn't hallucinate success toolMessages.push({ role: "tool", tool_call_id: id, content: `Error: ${r.error}` }); } } yield { type: "complete", summary: { toolMessages, toolRuns } }; } ================================================ FILE: src/lib/server/textGeneration/reasoning.ts ================================================ import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint"; import { MessageUpdateType } from "$lib/types/MessageUpdate"; export async function generateSummaryOfReasoning( reasoning: string, modelId: string | undefined, locals: App.Locals | undefined ): Promise { const prompt = `Summarize concisely the following reasoning for the user. Keep it short (one short paragraph).\n\n${reasoning}`; const summary = await (async () => { const it = generateFromDefaultEndpoint({ messages: [{ from: "user", content: prompt }], modelId, locals, }); let out = ""; for await (const update of it) { if (update.type === MessageUpdateType.Stream) out += update.token; } return out; })(); return summary.trim(); } ================================================ FILE: src/lib/server/textGeneration/title.ts ================================================ import { config } from "$lib/server/config"; import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint"; import { logger } from "$lib/server/logger"; import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate"; import type { Conversation } from "$lib/types/Conversation"; import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator"; export async function* generateTitleForConversation( conv: Conversation, locals: App.Locals | undefined ): AsyncGenerator { try { const userMessage = conv.messages.find((m) => m.from === "user"); // HACK: detect if the conversation is new if (conv.title !== "New Chat" || !userMessage) return; const prompt = userMessage.content; const modelForTitle = config.TASK_MODEL?.trim() ? config.TASK_MODEL : conv.model; const title = (await generateTitle(prompt, modelForTitle, locals)) ?? "New Chat"; yield { type: MessageUpdateType.Title, title, }; } catch (cause) { logger.error(cause, "Failed while generating title for conversation"); } } async function generateTitle( prompt: string, modelId: string | undefined, locals: App.Locals | undefined ) { if (config.LLM_SUMMARIZATION !== "true") { // When summarization is disabled, use the first five words without adding emojis return prompt.split(/\s+/g).slice(0, 5).join(" "); } // Tools removed: no tool-based title path return await getReturnFromGenerator( generateFromDefaultEndpoint({ messages: [{ from: "user", content: `User message: "${prompt}"` }], preprompt: `You are a chat thread titling assistant. Goal: Produce a very short, descriptive title (2–4 words) that names the topic of the user's first message. Rules: - Output ONLY the title text. No prefixes, labels, quotes, emojis, hashtags, or trailing punctuation. - Use the user's language. - Write a noun phrase that names the topic. Do not write instructions. - Never output just a pronoun (me/you/I/we/us/myself/yourself). Prefer a neutral subject (e.g., "Assistant", "model", or the concrete topic). - Never include meta-words: Summarize, Summary, Title, Prompt, Topic, Subject, About, Question, Request, Chat. Examples: User: "Summarize hello" -> Hello User: "How do I reverse a string in Python?" -> Python string reversal User: "help me plan a NYC weekend" -> NYC weekend plan User: "请解释Transformer是如何工作的" -> Transformer 工作原理 User: "tell me more about you" -> About the assistant Return only the title text.`, generateSettings: { max_tokens: 24, temperature: 0, }, modelId, locals, }) ) .then((summary) => { const firstFive = prompt.split(/\s+/g).slice(0, 5).join(" "); const trimmed = String(summary ?? "").trim(); // Fallback: if empty, return first five words only (no emoji) return trimmed || firstFive; }) .catch((e) => { logger.error(e, "Error generating title"); const firstFive = prompt.split(/\s+/g).slice(0, 5).join(" "); return firstFive; }); } // No post-processing: rely solely on prompt instructions above ================================================ FILE: src/lib/server/textGeneration/types.ts ================================================ import type { ProcessedModel } from "../models"; import type { Endpoint } from "../endpoints/endpoints"; import type { Conversation } from "$lib/types/Conversation"; import type { Message } from "$lib/types/Message"; import type { Assistant } from "$lib/types/Assistant"; export interface TextGenerationContext { model: ProcessedModel; endpoint: Endpoint; conv: Conversation; messages: Message[]; assistant?: Pick; promptedAt: Date; ip: string; username?: string; /** Force-enable multimodal handling for endpoints that support it */ forceMultimodal?: boolean; /** Force-enable tool calling even if model does not advertise support */ forceTools?: boolean; /** Inference provider preference: "auto", "fastest", "cheapest", or a specific provider name */ provider?: string; locals: App.Locals | undefined; abortController: AbortController; } ================================================ FILE: src/lib/server/textGeneration/utils/prepareFiles.ts ================================================ import type { MessageFile } from "$lib/types/Message"; import type { EndpointMessage } from "$lib/server/endpoints/endpoints"; import type { OpenAI } from "openai"; import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime"; import type { makeImageProcessor } from "$lib/server/endpoints/images"; /** * Prepare chat messages for OpenAI-compatible multimodal payloads. * - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled. * - Injects text-file content into the user message text. * - Leaves messages untouched when no files or multimodal disabled. */ export async function prepareMessagesWithFiles( messages: EndpointMessage[], imageProcessor: ReturnType, isMultimodal: boolean ): Promise { return Promise.all( messages.map(async (message) => { if (message.from === "user" && message.files && message.files.length > 0) { const { imageParts, textContent } = await prepareFiles( imageProcessor, message.files, isMultimodal ); let messageText = message.content; if (textContent.length > 0) { messageText = textContent + "\n\n" + message.content; } if (imageParts.length > 0 && isMultimodal) { const parts = [{ type: "text" as const, text: messageText }, ...imageParts]; return { role: message.from, content: parts }; } return { role: message.from, content: messageText }; } return { role: message.from, content: message.content }; }) ); } async function prepareFiles( imageProcessor: ReturnType, files: MessageFile[], isMultimodal: boolean ): Promise<{ imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[]; textContent: string; }> { const imageFiles = files.filter((file) => file.mime.startsWith("image/")); const textFiles = files.filter((file) => { const mime = (file.mime || "").toLowerCase(); const [fileType, fileSubtype] = mime.split("/"); return TEXT_MIME_ALLOWLIST.some((allowed) => { const [type, subtype] = allowed.toLowerCase().split("/"); const typeOk = type === "*" || type === fileType; const subOk = subtype === "*" || subtype === fileSubtype; return typeOk && subOk; }); }); let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = []; if (isMultimodal && imageFiles.length > 0) { const processedFiles = await Promise.all(imageFiles.map(imageProcessor)); imageParts = processedFiles.map((file) => ({ type: "image_url" as const, image_url: { url: `data:${file.mime};base64,${file.image.toString("base64")}`, detail: "auto", }, })); } let textContent = ""; if (textFiles.length > 0) { const textParts = await Promise.all( textFiles.map(async (file) => { const content = Buffer.from(file.value, "base64").toString("utf-8"); return `\n${content}\n`; }) ); textContent = textParts.join("\n\n"); } return { imageParts, textContent }; } ================================================ FILE: src/lib/server/textGeneration/utils/routing.ts ================================================ import type { EndpointMessage } from "../../endpoints/endpoints"; const ROUTER_REASONING_REGEX = /[\s\S]*?(?:<\/think>|$)/g; export function stripReasoningBlocks(text: string): string { const stripped = text.replace(ROUTER_REASONING_REGEX, ""); return stripped === text ? text : stripped.trim(); } export function stripReasoningFromMessageForRouting(message: EndpointMessage): EndpointMessage { const clone = { ...message } as EndpointMessage & { reasoning?: string }; if ("reasoning" in clone) { delete clone.reasoning; } const content = typeof message.content === "string" ? stripReasoningBlocks(message.content) : message.content; return { ...clone, content, }; } ================================================ FILE: src/lib/server/textGeneration/utils/toolPrompt.ts ================================================ import type { OpenAiTool } from "$lib/server/mcp/tools"; export function buildToolPreprompt(tools: OpenAiTool[]): string { if (!Array.isArray(tools) || tools.length === 0) return ""; const names = tools .map((t) => (t?.function?.name ? String(t.function.name) : "")) .filter((s) => s.length > 0); if (names.length === 0) return ""; const now = new Date(); const currentDate = now.toLocaleDateString("en-US", { year: "numeric", month: "long", day: "numeric", }); const isoDate = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")}`; return [ `You have access to these tools: ${names.join(", ")}.`, `Today's date: ${currentDate} (${isoDate}).`, `IMPORTANT: Do NOT call a tool unless the user's request requires capabilities you lack (e.g., real-time data, image generation, code execution) or external information you do not have. For tasks like writing code, creative writing, math, or building apps, respond directly without tools. When in doubt, do not use a tool.`, `PARALLEL TOOL CALLS: When multiple tool calls are needed and they are independent of each other (i.e., one does not need the result of another), call them all at once in a single response instead of one at a time. Only chain tool calls sequentially when a later call depends on an earlier call's output.`, `SEARCH: Use 3-6 precise keywords. For historical events, include the year the event occurred. For recent or current topics, use today's year (${now.getFullYear()}). When a tool accepts date-range parameters (e.g., startPublishedDate, endPublishedDate), always use today's date (${isoDate}) as the end date unless the user specifies otherwise. For multi-part questions, search each part separately.`, `ANSWER: State only facts explicitly in the results. If info is missing or results conflict, say so. Never fabricate URLs or facts.`, `INTERACTIVE APPS: When asked to build an interactive application, game, or visualization without a specific language/framework preference, create a single self-contained HTML file with embedded CSS and JavaScript.`, `If a tool generates an image, you can inline it directly: ![alt text](image_url).`, `If a tool needs an image, set its image field ("input_image", "image", or "image_url") to a reference like "image_1", "image_2", etc. (ordered by when the user uploaded them).`, `Default to image references; only use a full http(s) URL when the tool description explicitly asks for one, or reuse a URL a previous tool returned.`, ].join(" "); } ================================================ FILE: src/lib/server/urlSafety.ts ================================================ import { Address4, Address6 } from "ip-address"; import { isIP } from "node:net"; const UNSAFE_IPV4_SUBNETS = [ "0.0.0.0/8", "100.64.0.0/10", "127.0.0.0/8", "169.254.0.0/16", "172.16.0.0/12", "192.168.0.0/16", ].map((s) => new Address4(s)); function isUnsafeIp(address: string): boolean { const family = isIP(address); if (family === 4) { const addr = new Address4(address); return UNSAFE_IPV4_SUBNETS.some((subnet) => addr.isInSubnet(subnet)); } if (family === 6) { const addr = new Address6(address); // Check IPv4-mapped IPv6 addresses (e.g. ::ffff:127.0.0.1) if (addr.is4()) { const v4 = addr.to4(); return UNSAFE_IPV4_SUBNETS.some((subnet) => v4.isInSubnet(subnet)); } return addr.isLoopback() || addr.isLinkLocal(); } return true; // Unknown format → block } /** * Synchronous URL validation: checks protocol and hostname string. */ export function isValidUrl(urlString: string): boolean { try { const url = new URL(urlString.trim()); if (url.protocol !== "https:") { return false; } const hostname = url.hostname.toLowerCase(); if (hostname === "localhost") { return false; } // If the hostname is a raw IP literal, validate it const cleanHostname = hostname.replace(/^\[|]$/g, ""); if (isIP(cleanHostname)) { return !isUnsafeIp(cleanHostname); } return true; } catch { return false; } } /** * Assert that a resolved IP address is safe (not internal/private). * Throws if the IP is internal. Used in undici's custom DNS lookup * to validate IPs at connection time (prevents TOCTOU DNS rebinding). */ export function assertSafeIp(address: string, hostname: string): void { if (isUnsafeIp(address)) { throw new Error(`Resolved IP for ${hostname} is internal (${address})`); } } ================================================ FILE: src/lib/server/usageLimits.ts ================================================ import { z } from "zod"; import { config } from "$lib/server/config"; import JSON5 from "json5"; const sanitizeJSONEnv = (val: string, fallback: string) => { const raw = (val ?? "").trim(); const unquoted = raw.startsWith("`") && raw.endsWith("`") ? raw.slice(1, -1) : raw; return unquoted || fallback; }; // RATE_LIMIT is the legacy way to define messages per minute limit export const usageLimitsSchema = z .object({ conversations: z.coerce.number().optional(), // how many conversations messages: z.coerce.number().optional(), // how many messages in a conversation messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off messagesPerMinute: z .preprocess((val) => { if (val === undefined) { return config.RATE_LIMIT; } return val; }, z.coerce.number().optional()) .optional(), // how many messages per minute }) .optional(); export const usageLimits = usageLimitsSchema.parse( JSON5.parse(sanitizeJSONEnv(config.USAGE_LIMITS, "{}")) ); ================================================ FILE: src/lib/stores/backgroundGenerations.svelte.ts ================================================ export type BackgroundGeneration = { id: string; startedAt: number; }; export const backgroundGenerationEntries = $state([]); export function addBackgroundGeneration(entry: BackgroundGeneration) { const index = backgroundGenerationEntries.findIndex(({ id }) => id === entry.id); if (index === -1) { backgroundGenerationEntries.push(entry); return; } backgroundGenerationEntries[index] = entry; } export function removeBackgroundGeneration(id: string) { const index = backgroundGenerationEntries.findIndex((entry) => entry.id === id); if (index === -1) return; backgroundGenerationEntries.splice(index, 1); } export function clearBackgroundGenerations() { backgroundGenerationEntries.length = 0; } export function hasBackgroundGeneration(id: string) { return backgroundGenerationEntries.some((entry) => entry.id === id); } ================================================ FILE: src/lib/stores/backgroundGenerations.ts ================================================ export * from "./backgroundGenerations.svelte"; ================================================ FILE: src/lib/stores/errors.ts ================================================ import { writable } from "svelte/store"; export const ERROR_MESSAGES = { default: "Oops, something went wrong.", authOnly: "You have to be logged in.", rateLimited: "You are sending too many messages. Try again later.", }; export const error = writable(undefined); ================================================ FILE: src/lib/stores/isAborted.ts ================================================ import { writable } from "svelte/store"; export const isAborted = writable(false); ================================================ FILE: src/lib/stores/isPro.ts ================================================ import { writable } from "svelte/store"; // null = unknown/loading, true = PRO, false = not PRO export const isPro = writable(null); ================================================ FILE: src/lib/stores/loading.ts ================================================ import { writable } from "svelte/store"; export const loading = writable(false); ================================================ FILE: src/lib/stores/mcpServers.ts ================================================ /** * MCP Servers Store * Manages base (env-configured) and custom (user-added) MCP servers * Stores custom servers and selection state in browser localStorage */ import { writable, derived, get } from "svelte/store"; import { base } from "$app/paths"; import { env as publicEnv } from "$env/dynamic/public"; import { browser } from "$app/environment"; import type { MCPServer, ServerStatus, MCPTool } from "$lib/types/Tool"; // Namespace storage by app identity to avoid collisions across apps function toKeyPart(s: string | undefined): string { return (s || "").toLowerCase().replace(/[^a-z0-9_-]+/g, "-"); } const appLabel = toKeyPart(publicEnv.PUBLIC_APP_ASSETS || publicEnv.PUBLIC_APP_NAME); const baseLabel = toKeyPart(typeof base === "string" ? base : ""); // Final prefix format requested: "huggingchat:key" (no mcp:/chat) const KEY_PREFIX = appLabel || baseLabel || "app"; const STORAGE_KEYS = { CUSTOM_SERVERS: `${KEY_PREFIX}:mcp:custom-servers`, SELECTED_IDS: `${KEY_PREFIX}:mcp:selected-ids`, DISABLED_BASE_IDS: `${KEY_PREFIX}:mcp:disabled-base-ids`, } as const; // No migration needed per request — read/write only namespaced keys // Load custom servers from localStorage function loadCustomServers(): MCPServer[] { if (!browser) return []; try { const json = localStorage.getItem(STORAGE_KEYS.CUSTOM_SERVERS); return json ? JSON.parse(json) : []; } catch (error) { console.error("Failed to load custom MCP servers from localStorage:", error); return []; } } // Load selected server IDs from localStorage function loadSelectedIds(): Set { if (!browser) return new Set(); try { const json = localStorage.getItem(STORAGE_KEYS.SELECTED_IDS); const ids: string[] = json ? JSON.parse(json) : []; return new Set(ids); } catch (error) { console.error("Failed to load selected MCP server IDs from localStorage:", error); return new Set(); } } // Save custom servers to localStorage function saveCustomServers(servers: MCPServer[]) { if (!browser) return; try { localStorage.setItem(STORAGE_KEYS.CUSTOM_SERVERS, JSON.stringify(servers)); } catch (error) { console.error("Failed to save custom MCP servers to localStorage:", error); } } // Save selected IDs to localStorage function saveSelectedIds(ids: Set) { if (!browser) return; try { localStorage.setItem(STORAGE_KEYS.SELECTED_IDS, JSON.stringify([...ids])); } catch (error) { console.error("Failed to save selected MCP server IDs to localStorage:", error); } } // Load disabled base server IDs from localStorage (empty set if missing or on error) function loadDisabledBaseIds(): Set { if (!browser) return new Set(); try { const json = localStorage.getItem(STORAGE_KEYS.DISABLED_BASE_IDS); return new Set(json ? JSON.parse(json) : []); } catch (error) { console.error("Failed to load disabled base MCP server IDs from localStorage:", error); return new Set(); } } // Save disabled base server IDs to localStorage function saveDisabledBaseIds(ids: Set) { if (!browser) return; try { localStorage.setItem(STORAGE_KEYS.DISABLED_BASE_IDS, JSON.stringify([...ids])); } catch (error) { console.error("Failed to save disabled base MCP server IDs to localStorage:", error); } } // Store for all servers (base + custom) export const allMcpServers = writable([]); // Track if initial server load has completed export const mcpServersLoaded = writable(false); // Store for selected server IDs export const selectedServerIds = writable>(loadSelectedIds()); // Auto-persist selected IDs when they change if (browser) { selectedServerIds.subscribe((ids) => { saveSelectedIds(ids); }); } // Derived store: only enabled servers export const enabledServers = derived([allMcpServers, selectedServerIds], ([$all, $selected]) => $all.filter((s) => $selected.has(s.id)) ); // Derived store: count of enabled servers export const enabledServersCount = derived(enabledServers, ($enabled) => $enabled.length); // Derived store: true if all base servers are enabled export const allBaseServersEnabled = derived( [allMcpServers, selectedServerIds], ([$all, $selected]) => { const baseServers = $all.filter((s) => s.type === "base"); return baseServers.length > 0 && baseServers.every((s) => $selected.has(s.id)); } ); // Note: Authorization overlay (with user's HF token) for the Hugging Face MCP host // is applied server-side when enabled via MCP_FORWARD_HF_USER_TOKEN. /** * Refresh base servers from API and merge with custom servers */ export async function refreshMcpServers() { try { const response = await fetch(`${base}/api/mcp/servers`); if (!response.ok) { throw new Error(`Failed to fetch base servers: ${response.statusText}`); } const baseServers: MCPServer[] = await response.json(); const customServers = loadCustomServers(); // Merge base and custom servers const merged = [...baseServers, ...customServers]; allMcpServers.set(merged); // Load disabled base servers const disabledBaseIds = loadDisabledBaseIds(); // Auto-enable all base servers that aren't explicitly disabled // Plus keep any custom servers that were previously selected const validIds = new Set(merged.map((s) => s.id)); selectedServerIds.update(($currentIds) => { const newSelection = new Set(); // Add all base servers that aren't disabled for (const server of baseServers) { if (!disabledBaseIds.has(server.id)) { newSelection.add(server.id); } } // Keep custom servers that were selected and still exist for (const id of $currentIds) { if (validIds.has(id) && !id.startsWith("base-")) { newSelection.add(id); } } return newSelection; }); mcpServersLoaded.set(true); } catch (error) { console.error("Failed to refresh MCP servers:", error); // On error, just use custom servers allMcpServers.set(loadCustomServers()); mcpServersLoaded.set(true); } } /** * Toggle a server on/off */ export function toggleServer(id: string) { selectedServerIds.update(($ids) => { const newSet = new Set($ids); if (newSet.has(id)) { newSet.delete(id); // Track if this is a base server being disabled if (id.startsWith("base-")) { const disabled = loadDisabledBaseIds(); disabled.add(id); saveDisabledBaseIds(disabled); } } else { newSet.add(id); // Remove from disabled if re-enabling a base server if (id.startsWith("base-")) { const disabled = loadDisabledBaseIds(); disabled.delete(id); saveDisabledBaseIds(disabled); } } return newSet; }); } /** * Disable all MCP servers (marks all base servers as disabled) */ export function disableAllServers() { // Get current base server IDs and mark them all as disabled const servers = get(allMcpServers); const baseServerIds = servers.filter((s) => s.type === "base").map((s) => s.id); // Save all base servers as disabled saveDisabledBaseIds(new Set(baseServerIds)); // Clear the selection selectedServerIds.set(new Set()); } /** * Add a custom MCP server */ export function addCustomServer(server: Omit): string { const newServer: MCPServer = { ...server, id: crypto.randomUUID(), type: "custom", status: "disconnected", }; const customServers = loadCustomServers(); customServers.push(newServer); saveCustomServers(customServers); // Refresh all servers to include the new one refreshMcpServers(); return newServer.id; } /** * Update an existing custom server */ export function updateCustomServer(id: string, updates: Partial) { const customServers = loadCustomServers(); const index = customServers.findIndex((s) => s.id === id); if (index !== -1) { customServers[index] = { ...customServers[index], ...updates }; saveCustomServers(customServers); refreshMcpServers(); } } /** * Delete a custom server */ export function deleteCustomServer(id: string) { const customServers = loadCustomServers(); const filtered = customServers.filter((s) => s.id !== id); saveCustomServers(filtered); // Also remove from selected IDs selectedServerIds.update(($ids) => { const newSet = new Set($ids); newSet.delete(id); return newSet; }); refreshMcpServers(); } /** * Update server status (from health check) */ export function updateServerStatus( id: string, status: ServerStatus, errorMessage?: string, tools?: MCPTool[], authRequired?: boolean ) { allMcpServers.update(($servers) => $servers.map((s) => s.id === id ? { ...s, status, errorMessage, tools, authRequired, } : s ) ); } /** * Run health check on a server */ export async function healthCheckServer( server: MCPServer ): Promise<{ ready: boolean; tools?: MCPTool[]; error?: string }> { try { updateServerStatus(server.id, "connecting"); const response = await fetch(`${base}/api/mcp/health`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ url: server.url, headers: server.headers }), }); const result = await response.json(); if (result.ready && result.tools) { updateServerStatus(server.id, "connected", undefined, result.tools, false); return { ready: true, tools: result.tools }; } else { updateServerStatus(server.id, "error", result.error, undefined, Boolean(result.authRequired)); return { ready: false, error: result.error }; } } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error"; updateServerStatus(server.id, "error", errorMessage); return { ready: false, error: errorMessage }; } } // Initialize on module load if (browser) { refreshMcpServers(); } ================================================ FILE: src/lib/stores/pendingChatInput.ts ================================================ import { writable } from "svelte/store"; export const pendingChatInput = writable(undefined); ================================================ FILE: src/lib/stores/pendingMessage.ts ================================================ import { writable } from "svelte/store"; export const pendingMessage = writable< | { content: string; files: File[]; } | undefined >(); ================================================ FILE: src/lib/stores/settings.ts ================================================ import { browser } from "$app/environment"; import { invalidate } from "$app/navigation"; import { base } from "$app/paths"; import type { StreamingMode } from "$lib/types/Settings"; import { UrlDependency } from "$lib/types/UrlDependency"; import { getContext, setContext } from "svelte"; import { type Writable, writable, get } from "svelte/store"; type SettingsStore = { shareConversationsWithModelAuthors: boolean; welcomeModalSeen: boolean; welcomeModalSeenAt: Date | null; activeModel: string; customPrompts: Record; multimodalOverrides: Record; toolsOverrides: Record; hidePromptExamples: Record; providerOverrides: Record; recentlySaved: boolean; streamingMode: StreamingMode; directPaste: boolean; hapticsEnabled: boolean; billingOrganization?: string; }; type SettingsStoreWritable = Writable & { instantSet: (settings: Partial) => Promise; initValue: ( key: K, nestedKey: string, value: string | boolean ) => Promise; }; export function useSettingsStore() { return getContext("settings"); } export function createSettingsStore(initialValue: Omit) { const baseStore = writable({ ...initialValue, recentlySaved: false }); let timeoutId: NodeJS.Timeout; let showSavedOnNextSync = false; async function setSettings(settings: Partial) { baseStore.update((s) => ({ ...s, ...settings, })); if (browser) { showSavedOnNextSync = true; // User edit, should show "Saved" clearTimeout(timeoutId); timeoutId = setTimeout(async () => { await fetch(`${base}/settings`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(get(baseStore)), }); invalidate(UrlDependency.ConversationList); if (showSavedOnNextSync) { // set savedRecently to true for 3s baseStore.update((s) => ({ ...s, recentlySaved: true, })); setTimeout(() => { baseStore.update((s) => ({ ...s, recentlySaved: false, })); }, 3000); } showSavedOnNextSync = false; }, 300); // debounce server calls by 300ms } } async function initValue( key: K, nestedKey: string, value: string | boolean ) { const currentStore = get(baseStore); const currentNestedObject = currentStore[key] as Record; // Only initialize if undefined if (currentNestedObject?.[nestedKey] !== undefined) { return; } // Update the store const newNestedObject = { ...(currentNestedObject || {}), [nestedKey]: value, }; baseStore.update((s) => ({ ...s, [key]: newNestedObject, })); // Save to server (debounced) - note: we don't set showSavedOnNextSync if (browser) { clearTimeout(timeoutId); timeoutId = setTimeout(async () => { await fetch(`${base}/settings`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(get(baseStore)), }); invalidate(UrlDependency.ConversationList); if (showSavedOnNextSync) { baseStore.update((s) => ({ ...s, recentlySaved: true, })); setTimeout(() => { baseStore.update((s) => ({ ...s, recentlySaved: false, })); }, 3000); } showSavedOnNextSync = false; }, 300); } } async function instantSet(settings: Partial) { baseStore.update((s) => ({ ...s, ...settings, })); if (browser) { await fetch(`${base}/settings`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ ...get(baseStore), ...settings, }), }); invalidate(UrlDependency.ConversationList); } } const newStore = { subscribe: baseStore.subscribe, set: setSettings, instantSet, initValue, update: (fn: (s: SettingsStore) => SettingsStore) => { setSettings(fn(get(baseStore))); }, } satisfies SettingsStoreWritable; setContext("settings", newStore); return newStore; } ================================================ FILE: src/lib/stores/shareModal.ts ================================================ import { writable } from "svelte/store"; function createShareModalStore() { const { subscribe, set } = writable(false); return { subscribe, open: () => set(true), close: () => set(false), }; } export const shareModal = createShareModalStore(); ================================================ FILE: src/lib/stores/titleUpdate.ts ================================================ import { writable } from "svelte/store"; export interface TitleUpdate { convId: string; title: string; } export default writable(null); ================================================ FILE: src/lib/switchTheme.ts ================================================ export type ThemePreference = "light" | "dark" | "system"; type ThemeState = { preference: ThemePreference; isDark: boolean; }; type ThemeSubscriber = (state: ThemeState) => void; let currentPreference: ThemePreference = "system"; const subscribers = new Set(); function notify(preference: ThemePreference, isDark: boolean) { for (const subscriber of subscribers) { subscriber({ preference, isDark }); } } export function subscribeToTheme(subscriber: ThemeSubscriber) { subscribers.add(subscriber); if (typeof document !== "undefined") { const preference = getThemePreference(); const isDark = document.documentElement.classList.contains("dark"); subscriber({ preference, isDark }); } else { subscriber({ preference: "system", isDark: false }); } return () => { subscribers.delete(subscriber); }; } function setMetaThemeColor(isDark: boolean) { const metaTheme = document.querySelector('meta[name="theme-color"]') as HTMLMetaElement | null; if (!metaTheme) return; metaTheme.setAttribute("content", isDark ? "rgb(26, 36, 50)" : "rgb(249, 250, 251)"); } function applyDarkClass(isDark: boolean) { const { classList } = document.querySelector("html") as HTMLElement; if (isDark) classList.add("dark"); else classList.remove("dark"); setMetaThemeColor(isDark); notify(currentPreference, isDark); } export function getThemePreference(): ThemePreference { const raw = typeof localStorage !== "undefined" ? localStorage.getItem("theme") : null; if (raw === "light" || raw === "dark" || raw === "system") { currentPreference = raw; return raw; } currentPreference = "system"; return "system"; } /** * Explicitly set the theme preference and apply it immediately. * - "light": force light * - "dark": force dark * - "system": follow the OS preference */ export function setTheme(preference: ThemePreference) { try { localStorage.theme = preference; } catch (_err) { void 0; // ignore write errors } const mql = window.matchMedia("(prefers-color-scheme: dark)"); currentPreference = preference; const resolve = () => applyDarkClass(preference === "dark" || (preference === "system" && mql.matches)); // Apply now resolve(); // If following system, listen for changes; otherwise remove listener const listener = () => resolve(); // Store on window to allow replacing listener later const key = "__theme_mql_listener" as const; const w = window as unknown as { [key: string]: ((this: MediaQueryList, ev: MediaQueryListEvent) => void) | undefined; }; const existing = w[key]; if (existing) { try { mql.removeEventListener("change", existing); } catch (_err) { // older Safari compatibility const legacy = ( mql as unknown as { removeListener?: (l: (this: MediaQueryList, ev: MediaQueryListEvent) => void) => void; } ).removeListener; legacy?.(existing); } w[key] = undefined; } if (preference === "system") { try { mql.addEventListener("change", listener); } catch (_err) { // older Safari compatibility const legacy = ( mql as unknown as { addListener?: (l: (this: MediaQueryList, ev: MediaQueryListEvent) => void) => void; } ).addListener; legacy?.(listener); } w[key] = listener; } } // Backward-compatible toggle used by the sidebar button export function switchTheme() { const html = document.querySelector("html") as HTMLElement; const isDark = html.classList.contains("dark"); const next: ThemePreference = isDark ? "light" : "dark"; setTheme(next); } ================================================ FILE: src/lib/types/AbortedGeneration.ts ================================================ // Ideally shouldn't be needed, see https://github.com/huggingface/chat-ui/pull/88#issuecomment-1523173850 import type { Conversation } from "./Conversation"; import type { Timestamps } from "./Timestamps"; export interface AbortedGeneration extends Timestamps { conversationId: Conversation["_id"]; } ================================================ FILE: src/lib/types/Assistant.ts ================================================ import type { ObjectId } from "mongodb"; import type { User } from "./User"; import type { Timestamps } from "./Timestamps"; import type { ReviewStatus } from "./Review"; export interface Assistant extends Timestamps { _id: ObjectId; createdById: User["_id"] | string; // user id or session createdByName?: User["username"]; avatar?: string; name: string; description?: string; modelId: string; exampleInputs: string[]; preprompt: string; userCount?: number; review: ReviewStatus; // Web search / RAG removed in this build generateSettings?: { temperature?: number; top_p?: number; frequency_penalty?: number; top_k?: number; }; dynamicPrompt?: boolean; searchTokens: string[]; last24HoursCount: number; } // eslint-disable-next-line no-shadow // Removed duplicate unused SortKey enum (shared enum exists elsewhere) ================================================ FILE: src/lib/types/AssistantStats.ts ================================================ import type { Timestamps } from "./Timestamps"; import type { Assistant } from "./Assistant"; export interface AssistantStats extends Timestamps { assistantId: Assistant["_id"]; date: { at: Date; span: "hour"; }; count: number; } ================================================ FILE: src/lib/types/ConfigKey.ts ================================================ export interface ConfigKey { key: string; // unique value: string; } ================================================ FILE: src/lib/types/ConvSidebar.ts ================================================ import type { ObjectId } from "bson"; export interface ConvSidebar { id: ObjectId | string; title: string; updatedAt: Date; model?: string; avatarUrl?: string | Promise; } ================================================ FILE: src/lib/types/Conversation.ts ================================================ import type { ObjectId } from "mongodb"; import type { Message } from "./Message"; import type { Timestamps } from "./Timestamps"; import type { User } from "./User"; import type { Assistant } from "./Assistant"; export interface Conversation extends Timestamps { _id: ObjectId; sessionId?: string; userId?: User["_id"]; model: string; title: string; rootMessageId?: Message["id"]; messages: Message[]; meta?: { fromShareId?: string; }; preprompt?: string; assistantId?: Assistant["_id"]; userAgent?: string; } ================================================ FILE: src/lib/types/ConversationStats.ts ================================================ import type { Timestamps } from "./Timestamps"; export interface ConversationStats extends Timestamps { date: { at: Date; span: "day" | "week" | "month"; field: "updatedAt" | "createdAt"; }; type: "conversation" | "message"; /** _id => number of conversations/messages in the month */ distinct: "sessionId" | "userId" | "userOrSessionId" | "_id"; count: number; } ================================================ FILE: src/lib/types/Message.ts ================================================ import type { InferenceProvider } from "@huggingface/inference"; import type { MessageUpdate } from "./MessageUpdate"; import type { Timestamps } from "./Timestamps"; import type { v4 } from "uuid"; export type Message = Partial & { from: "user" | "assistant" | "system"; id: ReturnType; content: string; updates?: MessageUpdate[]; // Optional server or client-side reasoning content ( blocks) reasoning?: string; score?: -1 | 0 | 1; /** * Either contains the base64 encoded image data * or the hash of the file stored on the server **/ files?: MessageFile[]; interrupted?: boolean; // Router metadata when using llm-router routerMetadata?: { route: string; model: string; provider?: InferenceProvider; }; // needed for conversation trees ancestors?: Message["id"][]; // goes one level deep children?: Message["id"][]; }; export type MessageFile = { type: "hash" | "base64"; name: string; value: string; mime: string; }; ================================================ FILE: src/lib/types/MessageEvent.ts ================================================ import type { Session } from "./Session"; import type { Timestamps } from "./Timestamps"; import type { User } from "./User"; export interface MessageEvent extends Pick { userId: User["_id"] | Session["sessionId"]; ip?: string; expiresAt: Date; type: "message" | "export"; } ================================================ FILE: src/lib/types/MessageUpdate.ts ================================================ import type { InferenceProvider } from "@huggingface/inference"; import type { ToolCall, ToolResult } from "$lib/types/Tool"; export type MessageUpdate = | MessageStatusUpdate | MessageTitleUpdate | MessageToolUpdate | MessageStreamUpdate | MessageFileUpdate | MessageFinalAnswerUpdate | MessageReasoningUpdate | MessageRouterMetadataUpdate; export enum MessageUpdateType { Status = "status", Title = "title", Tool = "tool", Stream = "stream", File = "file", FinalAnswer = "finalAnswer", Reasoning = "reasoning", RouterMetadata = "routerMetadata", } // Status export enum MessageUpdateStatus { Started = "started", Error = "error", Finished = "finished", KeepAlive = "keepAlive", } export interface MessageStatusUpdate { type: MessageUpdateType.Status; status: MessageUpdateStatus; message?: string; statusCode?: number; } // Everything else export interface MessageTitleUpdate { type: MessageUpdateType.Title; title: string; } export interface MessageStreamUpdate { type: MessageUpdateType.Stream; token: string; /** Length of the original token. Used for compressed/persisted stream markers where token is empty. */ len?: number; } // Tool updates (for MCP and function calling) export enum MessageToolUpdateType { Call = "call", Result = "result", Error = "error", ETA = "eta", Progress = "progress", } interface MessageToolUpdateBase { type: MessageUpdateType.Tool; subtype: TSubtype; uuid: string; } export interface MessageToolCallUpdate extends MessageToolUpdateBase { call: ToolCall; } export interface MessageToolResultUpdate extends MessageToolUpdateBase { result: ToolResult; } export interface MessageToolErrorUpdate extends MessageToolUpdateBase { message: string; } export interface MessageToolEtaUpdate extends MessageToolUpdateBase { eta: number; } export interface MessageToolProgressUpdate extends MessageToolUpdateBase { progress: number; total?: number; message?: string; } export type MessageToolUpdate = | MessageToolCallUpdate | MessageToolResultUpdate | MessageToolErrorUpdate | MessageToolEtaUpdate | MessageToolProgressUpdate; export enum MessageReasoningUpdateType { Stream = "stream", Status = "status", } export type MessageReasoningUpdate = MessageReasoningStreamUpdate | MessageReasoningStatusUpdate; export interface MessageReasoningStreamUpdate { type: MessageUpdateType.Reasoning; subtype: MessageReasoningUpdateType.Stream; token: string; } export interface MessageReasoningStatusUpdate { type: MessageUpdateType.Reasoning; subtype: MessageReasoningUpdateType.Status; status: string; } export interface MessageFileUpdate { type: MessageUpdateType.File; name: string; sha: string; mime: string; } export interface MessageFinalAnswerUpdate { type: MessageUpdateType.FinalAnswer; text: string; interrupted: boolean; } export interface MessageRouterMetadataUpdate { type: MessageUpdateType.RouterMetadata; route: string; model: string; provider?: InferenceProvider; } ================================================ FILE: src/lib/types/MigrationResult.ts ================================================ import type { ObjectId } from "mongodb"; export interface MigrationResult { _id: ObjectId; name: string; status: "success" | "failure" | "ongoing"; } ================================================ FILE: src/lib/types/Model.ts ================================================ import type { BackendModel } from "$lib/server/models"; export type Model = Pick< BackendModel, | "id" | "name" | "displayName" | "isRouter" | "websiteUrl" | "datasetName" | "promptExamples" | "parameters" | "description" | "logoUrl" | "modelUrl" | "datasetUrl" | "preprompt" | "multimodal" | "multimodalAcceptedMimetypes" | "unlisted" | "hasInferenceAPI" | "providers" >; ================================================ FILE: src/lib/types/Report.ts ================================================ import type { ObjectId } from "mongodb"; import type { User } from "./User"; import type { Assistant } from "./Assistant"; import type { Timestamps } from "./Timestamps"; export interface Report extends Timestamps { _id: ObjectId; createdBy: User["_id"] | string; object: "assistant" | "tool"; contentId: Assistant["_id"]; reason?: string; } ================================================ FILE: src/lib/types/Review.ts ================================================ export enum ReviewStatus { PRIVATE = "PRIVATE", PENDING = "PENDING", APPROVED = "APPROVED", DENIED = "DENIED", } ================================================ FILE: src/lib/types/Semaphore.ts ================================================ import type { Timestamps } from "./Timestamps"; export interface Semaphore extends Timestamps { key: string; deleteAt: Date; } export enum Semaphores { CONVERSATION_STATS = "conversation.stats", CONFIG_UPDATE = "config.update", MIGRATION = "migration", TEST_MIGRATION = "test.migration", /** * Note this lock name is used as `${Semaphores.OAUTH_TOKEN_REFRESH}:${sessionId}` * * not a global lock, but a lock for each session */ OAUTH_TOKEN_REFRESH = "oauth.token.refresh", } ================================================ FILE: src/lib/types/Session.ts ================================================ import type { ObjectId } from "bson"; import type { Timestamps } from "./Timestamps"; import type { User } from "./User"; export interface Session extends Timestamps { _id: ObjectId; sessionId: string; userId: User["_id"]; userAgent?: string; ip?: string; expiresAt: Date; admin?: boolean; coupledCookieHash?: string; oauth?: { token: { value: string; expiresAt: Date; }; refreshToken?: string; }; } ================================================ FILE: src/lib/types/Settings.ts ================================================ import { defaultModel } from "$lib/server/models"; import type { Timestamps } from "./Timestamps"; import type { User } from "./User"; export type StreamingMode = "raw" | "smooth"; export interface Settings extends Timestamps { userId?: User["_id"]; sessionId?: string; shareConversationsWithModelAuthors: boolean; /** One-time welcome modal acknowledgement */ welcomeModalSeenAt?: Date | null; activeModel: string; // model name and system prompts customPrompts?: Record; /** * Per‑model overrides to enable multimodal (image) support * even when not advertised by the provider/model list. * Only the `true` value is meaningful (enables images). */ multimodalOverrides?: Record; /** * Per‑model overrides to enable tool calling (OpenAI tools/function calling) * even when not advertised by the provider list. Only `true` is meaningful. */ toolsOverrides?: Record; /** * Per-model toggle to hide Omni prompt suggestions shown near the composer. * When set to `true`, prompt examples for that model are suppressed. */ hidePromptExamples?: Record; /** * Per-model inference provider preference. * Values: "auto" (default), "fastest", "cheapest", or a specific provider name (e.g., "together", "sambanova"). * The value is appended to the model ID when making inference requests (e.g., "model:fastest"). */ providerOverrides?: Record; /** * Preferred assistant output behavior in the chat UI. * - "raw": show provider-native stream chunks * - "smooth": show smoothed stream chunks */ streamingMode: StreamingMode; directPaste: boolean; /** * Whether haptic feedback is enabled on supported touch devices. * Uses the ios-haptics library for cross-platform vibration. */ hapticsEnabled: boolean; /** * Organization to bill inference requests to (HuggingChat only). * Stores the org's preferred_username. If empty/undefined, bills to personal account. */ billingOrganization?: string; } export type SettingsEditable = Omit; // TODO: move this to a constant file along with other constants export const DEFAULT_SETTINGS = { shareConversationsWithModelAuthors: true, activeModel: defaultModel.id, customPrompts: {}, multimodalOverrides: {}, toolsOverrides: {}, hidePromptExamples: {}, providerOverrides: {}, streamingMode: "smooth", directPaste: false, hapticsEnabled: true, } satisfies SettingsEditable; ================================================ FILE: src/lib/types/SharedConversation.ts ================================================ import type { Conversation } from "./Conversation"; export type SharedConversation = Pick< Conversation, "model" | "title" | "rootMessageId" | "messages" | "preprompt" | "createdAt" | "updatedAt" > & { _id: string; hash: string; }; ================================================ FILE: src/lib/types/Template.ts ================================================ import type { Message } from "./Message"; export type ChatTemplateInput = { messages: Pick[]; preprompt?: string; }; ================================================ FILE: src/lib/types/Timestamps.ts ================================================ export interface Timestamps { createdAt: Date; updatedAt: Date; } ================================================ FILE: src/lib/types/TokenCache.ts ================================================ import type { Timestamps } from "./Timestamps"; export interface TokenCache extends Timestamps { tokenHash: string; // sha256 of the bearer token userId: string; // the matching hf user id } ================================================ FILE: src/lib/types/Tool.ts ================================================ export enum ToolResultStatus { Success = "success", Error = "error", } export interface ToolCall { name: string; parameters: Record; toolId?: string; } export interface ToolResultSuccess { status: ToolResultStatus.Success; call: ToolCall; outputs: Record[]; display?: boolean; } export interface ToolResultError { status: ToolResultStatus.Error; call: ToolCall; message: string; display?: boolean; } export type ToolResult = ToolResultSuccess | ToolResultError; export interface ToolFront { _id: string; name: string; displayName?: string; description?: string; color?: string; icon?: string; type?: "config" | "community"; isOnByDefault?: boolean; isLocked?: boolean; mimeTypes?: string[]; timeToUseMS?: number; } // MCP Server types export interface KeyValuePair { key: string; value: string; } export type ServerStatus = "connected" | "connecting" | "disconnected" | "error"; export interface MCPTool { name: string; description?: string; inputSchema?: unknown; } export interface MCPServer { id: string; name: string; url: string; type: "base" | "custom"; headers?: KeyValuePair[]; env?: KeyValuePair[]; status?: ServerStatus; isLocked?: boolean; tools?: MCPTool[]; errorMessage?: string; // Indicates server reports or appears to require OAuth or other auth authRequired?: boolean; } export interface MCPServerApi { url: string; headers?: KeyValuePair[]; } ================================================ FILE: src/lib/types/UrlDependency.ts ================================================ /* eslint-disable no-shadow */ export enum UrlDependency { ConversationList = "conversation:list", Conversation = "conversation:id", } ================================================ FILE: src/lib/types/User.ts ================================================ import type { ObjectId } from "mongodb"; import type { Timestamps } from "./Timestamps"; export interface User extends Timestamps { _id: ObjectId; username?: string; name: string; email?: string; avatarUrl: string | undefined; hfUserId: string; isAdmin?: boolean; isEarlyAccess?: boolean; } ================================================ FILE: src/lib/utils/PublicConfig.svelte.ts ================================================ import type { env as publicEnv } from "$env/dynamic/public"; import { page } from "$app/state"; import { base } from "$app/paths"; import type { Transporter } from "@sveltejs/kit"; import { getContext } from "svelte"; type PublicConfigKey = keyof typeof publicEnv; class PublicConfigManager { #configStore = $state>({}); constructor(initialConfig?: Record) { this.init = this.init.bind(this); this.getPublicConfig = this.getPublicConfig.bind(this); if (initialConfig) { this.init(initialConfig); } } init(publicConfig: Record) { this.#configStore = publicConfig; } get(key: PublicConfigKey) { return this.#configStore[key]; } getPublicConfig() { return this.#configStore; } get isHuggingChat() { return this.#configStore.PUBLIC_APP_ASSETS === "huggingchat"; } get assetPath() { return ( (this.#configStore.PUBLIC_ORIGIN || page.url.origin) + base + "/" + (this.#configStore.PUBLIC_APP_ASSETS || "chatui") ); } } type ConfigProxy = PublicConfigManager & { [K in PublicConfigKey]: string }; export function getConfigManager(initialConfig?: Record) { const publicConfigManager = new PublicConfigManager(initialConfig); const publicConfig: ConfigProxy = new Proxy(publicConfigManager, { get(target, prop) { if (prop in target) { return Reflect.get(target, prop); } if (typeof prop === "string") { return target.get(prop as PublicConfigKey); } return undefined; }, set(target, prop, value, receiver) { if (prop in target) { return Reflect.set(target, prop, value, receiver); } return false; }, }) as ConfigProxy; return publicConfig; } export const publicConfigTransporter: Transporter = { encode: (value) => value instanceof PublicConfigManager ? JSON.stringify(value.getPublicConfig()) : false, decode: (value) => getConfigManager(JSON.parse(value)), }; export const usePublicConfig = () => getContext("publicConfig"); ================================================ FILE: src/lib/utils/auth.ts ================================================ import { goto } from "$app/navigation"; import { base } from "$app/paths"; import { page } from "$app/state"; /** * Redirects to the login page if the user is not authenticated * and the login feature is enabled. */ export function requireAuthUser(): boolean { if (page.data.loginEnabled && !page.data.user) { const next = page.url.pathname + page.url.search; const url = `${base}/login?next=${encodeURIComponent(next)}`; goto(url, { invalidateAll: true }); return true; } return false; } ================================================ FILE: src/lib/utils/chunk.ts ================================================ /** * Chunk array into arrays of length at most `chunkSize` * * @param chunkSize must be greater than or equal to 1 */ export function chunk(arr: T, chunkSize: number): T[] { if (isNaN(chunkSize) || chunkSize < 1) { throw new RangeError("Invalid chunk size: " + chunkSize); } if (!arr.length) { return []; } /// Small optimization to not chunk buffers unless needed if (arr.length <= chunkSize) { return [arr]; } return range(Math.ceil(arr.length / chunkSize)).map((i) => { return arr.slice(i * chunkSize, (i + 1) * chunkSize); }) as T[]; } function range(n: number, b?: number): number[] { return b ? Array(b - n) .fill(0) .map((_, i) => n + i) : Array(n) .fill(0) .map((_, i) => i); } ================================================ FILE: src/lib/utils/cookiesAreEnabled.ts ================================================ import { browser } from "$app/environment"; export function cookiesAreEnabled(): boolean { if (!browser) return false; if (navigator.cookieEnabled) return navigator.cookieEnabled; // Create cookie document.cookie = "cookietest=1"; const ret = document.cookie.indexOf("cookietest=") != -1; // Delete cookie document.cookie = "cookietest=1; expires=Thu, 01-Jan-1970 00:00:01 GMT"; return ret; } ================================================ FILE: src/lib/utils/debounce.ts ================================================ /** * A debounce function that works in both browser and Nodejs. * For pure Nodejs work, prefer the `Debouncer` class. */ export function debounce( callback: (...rest: T) => unknown, limit: number ): (...rest: T) => void { let timer: ReturnType; return function (...rest) { clearTimeout(timer); timer = setTimeout(() => { callback(...rest); }, limit); }; } ================================================ FILE: src/lib/utils/deepestChild.ts ================================================ export function deepestChild(el: HTMLElement): HTMLElement { if (el.lastElementChild && el.lastElementChild.nodeType !== Node.TEXT_NODE) { return deepestChild(el.lastElementChild as HTMLElement); } return el; } ================================================ FILE: src/lib/utils/favicon.ts ================================================ /** * Generates a Google favicon URL for the given server URL * @param serverUrl - The MCP server URL (e.g., "https://mcp.exa.ai/mcp") * @param size - The size of the favicon in pixels (default: 64) * @returns The Google favicon service URL */ export function getMcpServerFaviconUrl(serverUrl: string, size: number = 64): string { try { const parsed = new URL(serverUrl); // Extract root domain (e.g., "exa.ai" from "mcp.exa.ai") // Google's favicon service needs the root domain, not subdomains const hostnameParts = parsed.hostname.split("."); const rootDomain = hostnameParts.length >= 2 ? hostnameParts.slice(-2).join(".") : parsed.hostname; const domain = `${parsed.protocol}//${rootDomain}`; return `https://www.google.com/s2/favicons?sz=${size}&domain_url=${encodeURIComponent(domain)}`; } catch { // If URL parsing fails, just use the raw serverUrl - Google will handle it return `https://www.google.com/s2/favicons?sz=${size}&domain_url=${encodeURIComponent(serverUrl)}`; } } ================================================ FILE: src/lib/utils/fetchJSON.ts ================================================ export async function fetchJSON( url: string, options?: { fetch?: typeof window.fetch; allowNull?: boolean; } ): Promise { const response = await (options?.fetch ?? fetch)(url); if (!response.ok) { throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`); } // Handle empty responses (which parse to null) const text = await response.text(); if (!text || text.trim() === "") { if (options?.allowNull) { return null as T; } throw new Error(`Received empty response from ${url} but allowNull is not set to true`); } return JSON.parse(text); } ================================================ FILE: src/lib/utils/file2base64.ts ================================================ const file2base64 = (file: File): Promise => { return new Promise((resolve, reject) => { const reader = new FileReader(); reader.readAsDataURL(file); reader.onload = () => { const dataUrl = reader.result as string; const base64 = dataUrl.split(",")[1]; resolve(base64); }; reader.onerror = (error) => reject(error); }); }; export default file2base64; ================================================ FILE: src/lib/utils/formatUserCount.ts ================================================ export function formatUserCount(userCount: number): string { const userCountRanges: { min: number; max: number; label: string }[] = [ { min: 0, max: 1, label: "1" }, { min: 2, max: 9, label: "1-10" }, { min: 10, max: 49, label: "10+" }, { min: 50, max: 99, label: "50+" }, { min: 100, max: 299, label: "100+" }, { min: 300, max: 499, label: "300+" }, { min: 500, max: 999, label: "500+" }, { min: 1_000, max: 2_999, label: "1k+" }, { min: 3_000, max: 4_999, label: "3k+" }, { min: 5_000, max: 9_999, label: "5k+" }, { min: 10_000, max: 19_999, label: "10k+" }, { min: 20_000, max: 29_999, label: "20k+" }, { min: 30_000, max: 39_999, label: "30k+" }, { min: 40_000, max: 49_999, label: "40k+" }, { min: 50_000, max: 59_999, label: "50k+" }, { min: 60_000, max: 69_999, label: "60k+" }, { min: 70_000, max: 79_999, label: "70k+" }, { min: 80_000, max: 89_999, label: "80k+" }, { min: 90_000, max: 99_999, label: "90k+" }, { min: 100_000, max: 109_999, label: "100k+" }, { min: 110_000, max: 119_999, label: "110k+" }, { min: 120_000, max: 129_999, label: "120k+" }, { min: 130_000, max: 139_999, label: "130k+" }, { min: 140_000, max: 149_999, label: "140k+" }, { min: 150_000, max: 199_999, label: "150k+" }, { min: 200_000, max: 299_999, label: "200k+" }, { min: 300_000, max: 499_999, label: "300k+" }, { min: 500_000, max: 749_999, label: "500k+" }, { min: 750_000, max: 999_999, label: "750k+" }, { min: 1_000_000, max: Infinity, label: "1M+" }, ]; const range = userCountRanges.find(({ min, max }) => userCount >= min && userCount <= max); return range?.label ?? ""; } ================================================ FILE: src/lib/utils/generationState.spec.ts ================================================ import { describe, expect, test } from "vitest"; import type { Message } from "$lib/types/Message"; import { MessageUpdateStatus, MessageUpdateType } from "$lib/types/MessageUpdate"; import { isAssistantGenerationTerminal, isConversationGenerationActive } from "./generationState"; function assistantMessage(overrides: Partial = {}): Message { return { from: "assistant", id: "assistant-1" as Message["id"], content: "", children: [], ...overrides, }; } describe("generationState", () => { test("returns active when assistant has no terminal update", () => { const messages = [ assistantMessage({ updates: [{ type: MessageUpdateType.Stream, token: "Hello" }], }), ]; expect(isConversationGenerationActive(messages)).toBe(true); }); test("treats final answer update as terminal", () => { const message = assistantMessage({ updates: [{ type: MessageUpdateType.FinalAnswer, text: "Done", interrupted: false }], }); expect(isAssistantGenerationTerminal(message)).toBe(true); expect(isConversationGenerationActive([message])).toBe(false); }); test("treats error status update as terminal", () => { const message = assistantMessage({ updates: [ { type: MessageUpdateType.Status, status: MessageUpdateStatus.Error, message: "Something went wrong", }, ], }); expect(isAssistantGenerationTerminal(message)).toBe(true); expect(isConversationGenerationActive([message])).toBe(false); }); test("treats finished status update as terminal", () => { const message = assistantMessage({ updates: [ { type: MessageUpdateType.Status, status: MessageUpdateStatus.Finished, }, ], }); expect(isAssistantGenerationTerminal(message)).toBe(true); expect(isConversationGenerationActive([message])).toBe(false); }); test("treats interrupted assistant message as terminal", () => { const message = assistantMessage({ interrupted: true, updates: [{ type: MessageUpdateType.Stream, token: "partial" }], }); expect(isAssistantGenerationTerminal(message)).toBe(true); expect(isConversationGenerationActive([message])).toBe(false); }); }); ================================================ FILE: src/lib/utils/generationState.ts ================================================ import type { Message } from "$lib/types/Message"; import { MessageUpdateStatus, MessageUpdateType } from "$lib/types/MessageUpdate"; export function isAssistantGenerationTerminal(message?: Message): boolean { if (!message || message.from !== "assistant") return true; if (message.interrupted === true) return true; const updates = message.updates ?? []; const hasFinalAnswer = updates.some((update) => update.type === MessageUpdateType.FinalAnswer); if (hasFinalAnswer) return true; return updates.some( (update) => update.type === MessageUpdateType.Status && (update.status === MessageUpdateStatus.Error || update.status === MessageUpdateStatus.Finished) ); } export function isConversationGenerationActive(messages: Message[]): boolean { const lastAssistant = [...messages].reverse().find((message) => message.from === "assistant"); if (!lastAssistant) return false; return !isAssistantGenerationTerminal(lastAssistant); } ================================================ FILE: src/lib/utils/getHref.ts ================================================ export function getHref( url: URL | string, modifications: { newKeys?: Record; existingKeys?: { behaviour: "delete_except" | "delete"; keys: string[] }; } ) { const newUrl = new URL(url); const { newKeys, existingKeys } = modifications; // exsiting keys logic if (existingKeys) { const { behaviour, keys } = existingKeys; if (behaviour === "delete") { for (const key of keys) { newUrl.searchParams.delete(key); } } else { // delete_except const keysToPreserve = keys; for (const key of [...newUrl.searchParams.keys()]) { if (!keysToPreserve.includes(key)) { newUrl.searchParams.delete(key); } } } } // new keys logic if (newKeys) { for (const [key, val] of Object.entries(newKeys)) { if (val) { newUrl.searchParams.set(key, val); } else { newUrl.searchParams.delete(key); } } } return newUrl.toString(); } ================================================ FILE: src/lib/utils/getReturnFromGenerator.ts ================================================ export async function getReturnFromGenerator(generator: AsyncGenerator): Promise { let result: IteratorResult; do { result = await generator.next(); } while (!result.done); // Keep calling `next()` until `done` is true return result.value; // Return the final value } ================================================ FILE: src/lib/utils/haptics.ts ================================================ import { browser } from "$app/environment"; import type { WebHaptics } from "web-haptics"; let instance: WebHaptics | null = null; let enabled = true; /** * Lazily initializes the WebHaptics instance on first use. * Avoids importing at module level so SSR doesn't break. */ async function getInstance(): Promise { if (!browser || !supportsHaptics()) return null; if (instance) return instance; try { const { WebHaptics: WH } = await import("web-haptics"); instance = new WH(); return instance; } catch { return null; } } /** Call from the settings store to keep haptics in sync with user preference. */ export function setHapticsEnabled(value: boolean) { enabled = value; } /** Whether the device likely supports haptic feedback (touch screen present). */ export function supportsHaptics(): boolean { return browser && navigator.maxTouchPoints > 0; } // ── Internals ──────────────────────────────────────────────────────── /** Fire a haptic pattern, swallowing errors so callers can safely fire-and-forget. */ function fire(pattern: string): void { if (!enabled) return; Promise.resolve(getInstance()) .then((h) => h?.trigger(pattern)) .catch(() => {}); } // ── Semantic haptic actions ────────────────────────────────────────── /** Light tap — for routine actions (send message, toggle, navigate). */ export function tap() { fire("light"); } /** Success confirmation — double-tap pattern (copy, share, save). */ export function confirm() { fire("success"); } /** Error / destructive warning — three rapid taps (delete, stop generation). */ export function error() { fire("error"); } /** Selection change — subtle tap for pickers and selections. */ export function selection() { fire("selection"); } /** Stream start burst — multiple short vibrations for a "machine starting up" feel. */ export function streamStart(): void { if (!enabled || !browser) return; if (typeof navigator.vibrate !== "function") return; // Three quick pulses: two short taps + a slightly longer finish navigator.vibrate([50, 30, 50, 30, 80]); } ================================================ FILE: src/lib/utils/hashConv.ts ================================================ import type { Conversation } from "$lib/types/Conversation"; import { sha256 } from "./sha256"; export async function hashConv(conv: Conversation) { // messages contains the conversation message but only the immutable part const messages = conv.messages.map((message) => { return (({ from, id, content }) => ({ from, id, content }))(message); }); const hash = await sha256(JSON.stringify(messages)); return hash; } ================================================ FILE: src/lib/utils/hf.ts ================================================ // Client-safe HF utilities used in UI components export function isStrictHfMcpLogin(urlString: string): boolean { try { const u = new URL(urlString); const host = u.hostname.toLowerCase(); const allowedHosts = new Set(["hf.co", "huggingface.co"]); return ( u.protocol === "https:" && allowedHosts.has(host) && u.pathname === "/mcp" && u.search === "?login" ); } catch { return false; } } ================================================ FILE: src/lib/utils/isDesktop.ts ================================================ // Approximate width from which we disable autofocus const TABLET_VIEWPORT_WIDTH = 768; export function isDesktop(window: Window) { const { innerWidth } = window; return innerWidth > TABLET_VIEWPORT_WIDTH; } ================================================ FILE: src/lib/utils/isUrl.ts ================================================ export function isURL(url: string) { try { new URL(url); return true; } catch (e) { return false; } } ================================================ FILE: src/lib/utils/isVirtualKeyboard.ts ================================================ import { browser } from "$app/environment"; export function isVirtualKeyboard(): boolean { if (!browser) return false; // Check for touch capability if (navigator.maxTouchPoints > 0 && screen.width <= 768) return true; // Check for touch events if ("ontouchstart" in window) return true; // Fallback to user agent string check const userAgent = navigator.userAgent.toLowerCase(); return /android|webos|iphone|ipad|ipod|blackberry|iemobile|opera mini/i.test(userAgent); } ================================================ FILE: src/lib/utils/loadAttachmentsFromUrls.ts ================================================ import { base } from "$app/paths"; import { pickSafeMime } from "$lib/utils/mime"; export interface AttachmentLoadResult { files: File[]; errors: string[]; } /** * Parse attachment URLs from query parameters * Supports both comma-separated (?attachments=url1,url2) and multiple params (?attachments=url1&attachments=url2) */ function parseAttachmentUrls(searchParams: URLSearchParams): string[] { const urls: string[] = []; // Get all 'attachments' parameters const attachmentParams = searchParams.getAll("attachments"); for (const param of attachmentParams) { // Split by comma in case multiple URLs are in one param const splitUrls = param.split(",").map((url) => url.trim()); urls.push(...splitUrls); } // Filter out empty strings return urls.filter((url) => url.length > 0); } /** * Extract filename from URL or Content-Disposition header */ function extractFilename(url: string, contentDisposition?: string | null): string { // Try to get filename from Content-Disposition header if (contentDisposition) { const filenameStar = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i)?.[1]; if (filenameStar) { const cleaned = filenameStar.trim().replace(/['"]/g, ""); try { return decodeURIComponent(cleaned); } catch { return cleaned; } } const match = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); if (match && match[1]) return match[1].replace(/['"]/g, ""); } // Fallback: extract from URL try { const urlObj = new URL(url); const pathname = urlObj.pathname; const segments = pathname.split("/"); const lastSegment = segments[segments.length - 1]; if (lastSegment && lastSegment.length > 0) { return decodeURIComponent(lastSegment); } } catch { // Invalid URL, fall through to default } return "attachment"; } /** * Load files from remote URLs via server-side proxy */ export async function loadAttachmentsFromUrls( searchParams: URLSearchParams ): Promise { const urls = parseAttachmentUrls(searchParams); if (urls.length === 0) { return { files: [], errors: [] }; } const files: File[] = []; const errors: string[] = []; await Promise.all( urls.map(async (url) => { try { // Fetch via our proxy endpoint to bypass CORS const proxyUrl = `${base}/api/fetch-url?${new URLSearchParams({ url })}`; const response = await fetch(proxyUrl); if (!response.ok) { const errorText = await response.text(); errors.push(`Failed to fetch ${url}: ${errorText}`); return; } const forwardedType = response.headers.get("x-forwarded-content-type"); const blob = await response.blob(); const mimeType = pickSafeMime(forwardedType, blob.type, url); const contentDisposition = response.headers.get("content-disposition"); const filename = extractFilename(url, contentDisposition); // Create File object const file = new File([blob], filename, { type: mimeType, }); files.push(file); } catch (err) { const message = err instanceof Error ? err.message : "Unknown error"; errors.push(`Failed to load ${url}: ${message}`); console.error(`Error loading attachment from ${url}:`, err); } }) ); return { files, errors }; } ================================================ FILE: src/lib/utils/marked.spec.ts ================================================ import { describe, expect, test } from "vitest"; import { processTokensSync } from "./marked"; function renderHtml(md: string): string { const tokens = processTokensSync(md, []); const textToken = tokens.find((token) => token.type === "text"); if (!textToken || textToken.type !== "text") return ""; return typeof textToken.html === "string" ? textToken.html : ""; } describe("marked basic rendering", () => { test("renders bold text", () => { const html = renderHtml("**bold**"); expect(html).toContain("bold"); }); test("renders links", () => { const html = renderHtml("[link](https://example.com)"); expect(html).toContain('"); }); test("renders paragraphs", () => { const html = renderHtml("hello world"); expect(html).toContain("

hello world

"); }); }); describe("marked image renderer", () => { test("renders video extensions as
================================================ FILE: src/routes/models/[...model]/+page.svelte ================================================ {modelId} - {publicConfig.PUBLIC_APP_NAME} createConversation(message)} {loading} currentModel={findCurrentModel(data.models, data.oldModels, modelId)} models={data.models} bind:files bind:draft /> ================================================ FILE: src/routes/models/[...model]/+page.ts ================================================ import { base } from "$app/paths"; export async function load({ params, parent, fetch }) { await fetch(`${base}/api/v2/models/${params.model}/subscribe`, { method: "POST", }); return { settings: await parent().then((data) => ({ ...data.settings, activeModel: params.model, })), }; } ================================================ FILE: src/routes/privacy/+page.svelte ================================================
{@html marked(privacy, { gfm: true })}
================================================ FILE: src/routes/r/[id]/+page.ts ================================================ import { redirect } from "@sveltejs/kit"; import { useAPIClient, handleResponse } from "$lib/APIClient"; import { base } from "$app/paths"; import type { PageLoad } from "./$types"; export const load: PageLoad = async ({ params, url, fetch, parent }) => { const leafId = url.searchParams.get("leafId"); const parentData = await parent(); // If logged in, import the share and redirect to the new conversation if (parentData.loginEnabled && parentData.user && params.id) { const client = useAPIClient({ fetch, origin: url.origin }); let importedConversationId: string | undefined; try { const result = await client.conversations["import-share"] .post({ shareId: params.id }) .then(handleResponse); importedConversationId = result.conversationId; } catch { // Fall through to view-only mode on error } if (importedConversationId) { redirect( 302, `${base}/conversation/${importedConversationId}?leafId=${leafId ?? ""}&fromShare=${params.id}` ); } } // Not logged in or import failed: redirect to view-only mode redirect(302, `${base}/conversation/${params.id}${leafId ? `?leafId=${leafId}` : ""}`); }; ================================================ FILE: src/routes/settings/(nav)/+layout.svelte ================================================
{#if showContent && browser} {/if}

Settings

{#if !(showContent && browser && !isDesktop(window))}

Models

{#each data.models .filter((el) => !el.unlisted) .filter((el) => { const haystack = normalize(`${el.id} ${el.name ?? ""} ${el.displayName ?? ""}`); return queryTokens.every((q) => haystack.includes(q)); }) as model} {/each}
{/if} {#if showContent}
{@render children?.()}
{/if}
================================================ FILE: src/routes/settings/(nav)/+layout.ts ================================================ export const ssr = false; ================================================ FILE: src/routes/settings/(nav)/+page.svelte ================================================ ================================================ FILE: src/routes/settings/(nav)/+server.ts ================================================ import { collections } from "$lib/server/database"; import { z } from "zod"; import { authCondition } from "$lib/server/auth"; import { DEFAULT_SETTINGS, type SettingsEditable } from "$lib/types/Settings"; import { resolveStreamingMode } from "$lib/utils/messageUpdates"; const settingsSchema = z.object({ shareConversationsWithModelAuthors: z .boolean() .default(DEFAULT_SETTINGS.shareConversationsWithModelAuthors), welcomeModalSeen: z.boolean().optional(), activeModel: z.string().default(DEFAULT_SETTINGS.activeModel), customPrompts: z.record(z.string()).default({}), multimodalOverrides: z.record(z.boolean()).default({}), toolsOverrides: z.record(z.boolean()).default({}), providerOverrides: z.record(z.string()).default({}), streamingMode: z.enum(["raw", "smooth"]).optional(), directPaste: z.boolean().default(false), hapticsEnabled: z.boolean().default(true), hidePromptExamples: z.record(z.boolean()).default({}), billingOrganization: z.string().optional(), }); export async function POST({ request, locals }) { const body = await request.json(); const { welcomeModalSeen, ...parsedSettings } = settingsSchema.parse(body); const streamingMode = resolveStreamingMode(parsedSettings); const settings = { ...parsedSettings, streamingMode, } satisfies SettingsEditable; await collections.settings.updateOne( authCondition(locals), { $set: { ...settings, ...(welcomeModalSeen && { welcomeModalSeenAt: new Date() }), updatedAt: new Date(), }, $setOnInsert: { createdAt: new Date(), }, }, { upsert: true, } ); // return ok response return new Response(); } ================================================ FILE: src/routes/settings/(nav)/[...model]/+page.svelte ================================================

{model.displayName}

{#if model.description}

{model.description}

{/if}
{#if model.modelUrl} Model page {/if} {#if model.datasetName || model.datasetUrl} Dataset page {/if} {#if model.websiteUrl} Model website {/if} {#if publicConfig.isHuggingChat} {#if !model?.isRouter} Use via API View model card {/if}
Copy direct link
{/if}
{#if model?.isRouter}

Omni routes your messages to the best underlying model depending on your request.

{/if}

System Prompt

{#if hasCustomPreprompt} {/if}
Tool calling (functions)

Enable tools and allow the model to call them in chat.

Multimodal support (image inputs)

Enable image uploads and send images to this model.

{#if model?.isRouter}
Hide prompt examples

Hide the prompt suggestions above the chat input.

{/if}
{#if publicConfig.isHuggingChat && model.providers?.length && !model?.isRouter}
Inference Providers

Choose which Inference Provider to use with this model. You can also manage provider preferences in your HF settings.

v && setProviderOverride(v)} > {@const currentValue = getProviderOverride()} {@const currentPolicy = PROVIDER_POLICIES.find((p) => p.value === currentValue)} {@const currentProvider = providerList.find((p) => p.provider === currentValue)} {#if currentValue === "auto"} {:else if currentValue === "fastest"} {:else if currentValue === "cheapest"} {:else if currentProvider} {@const hubOrg = PROVIDERS_HUB_ORGS[currentValue as keyof typeof PROVIDERS_HUB_ORGS]} {#if hubOrg} {/if} {/if} {currentPolicy?.label ?? currentProvider?.provider ?? currentValue} Selection mode {#each PROVIDER_POLICIES as opt (opt.value)} {#if opt.value === "auto"} {:else if opt.value === "fastest"} {:else if opt.value === "cheapest"} {/if} {opt.label} {#if getProviderOverride() === opt.value} {/if} {/each}
Specific provider {#each providerList as prov (prov.provider)} {@const hubOrg = PROVIDERS_HUB_ORGS[prov.provider as keyof typeof PROVIDERS_HUB_ORGS]} {#if hubOrg} {:else} {/if} {prov.provider} {#if getProviderOverride() === prov.provider} {/if} {/each}
{/if}
================================================ FILE: src/routes/settings/(nav)/[...model]/+page.ts ================================================ import { base } from "$app/paths"; import { redirect } from "@sveltejs/kit"; export async function load({ parent, params }) { const data = await parent(); const model = data.models.find((m: { id: string }) => m.id === params.model); if (!model || model.unlisted) { redirect(302, `${base}/settings`); } return data; } ================================================ FILE: src/routes/settings/(nav)/application/+page.svelte ================================================

Application Settings

{#if OPENAI_BASE_URL !== null}
API Base URL: {OPENAI_BASE_URL}
{/if} {#if !!publicConfig.PUBLIC_COMMIT_SHA} {/if} {#if page.data.isAdmin}

Admin mode

{#if refreshMessage} {refreshMessage} {/if}
{/if}
{#if publicConfig.PUBLIC_APP_DATA_SHARING === "1"}
Share with model authors

Sharing your data helps improve open models over time.

{/if}
Streaming mode

Choose how assistant text appears while generating.

Paste text directly

Paste long text directly into chat instead of a file.

{#if supportsHaptics()}
Haptic feedback

Vibrate on taps and actions on supported devices.

{/if}
Theme

Choose light, dark, or follow system.

{#if publicConfig.isHuggingChat && page.data.user}
Billing

Select between personal or organization billing (for eligible organizations).

{#if billingOrgsLoading} Loading... {:else if billingOrgsError} {billingOrgsError} {:else} {/if}
Providers Usage

See which providers you use and choose your preferred ones.

View Usage
{/if}
{#if publicConfig.isHuggingChat} Github repository Share your feedback on HuggingChat About & Privacy {/if}
================================================ FILE: src/routes/settings/+layout.svelte ================================================ goto(previousPage)} disableFly={true} width="border dark:border-gray-700 h-[95dvh] w-[90dvw] pb-0 overflow-hidden rounded-2xl bg-white shadow-2xl outline-none dark:bg-gray-800 dark:text-gray-200 sm:h-[95dvh] xl:w-[1200px] xl:h-[85dvh] 2xl:h-[75dvh]" > {@render children?.()} {#if $settings.recentlySaved}
Saved
{/if}
================================================ FILE: src/styles/highlight-js.css ================================================ /* Atom One Light (v9.16.2) */ /* Atom One Light by Daniel Gamage Original One Light Syntax theme from https://github.com/atom/one-light-syntax base: #fafafa mono-1: #383a42 mono-2: #686b77 mono-3: #a0a1a7 hue-1: #0184bb hue-2: #4078f2 hue-3: #a626a4 hue-4: #50a14f hue-5: #e45649 hue-5-2: #c91243 hue-6: #986801 hue-6-2: #c18401 */ .hljs { display: block; overflow-x: auto; padding: 0.5em; color: #383a42; background: #fafafa; } .hljs-comment, .hljs-quote { color: #a0a1a7; font-style: italic; } .hljs-doctag, .hljs-keyword, .hljs-formula { color: #a626a4; } .hljs-section, .hljs-name, .hljs-selector-tag, .hljs-deletion, .hljs-subst { color: #e45649; } .hljs-literal { color: #0184bb; } .hljs-string, .hljs-regexp, .hljs-addition, .hljs-attribute, .hljs-meta-string { color: #50a14f; } .hljs-built_in, .hljs-class .hljs-title { color: #c18401; } .hljs-attr, .hljs-variable, .hljs-template-variable, .hljs-type, .hljs-selector-class, .hljs-selector-attr, .hljs-selector-pseudo, .hljs-number { color: #986801; } .hljs-symbol, .hljs-bullet, .hljs-link, .hljs-meta, .hljs-selector-id, .hljs-title { color: #4078f2; } .hljs-emphasis { font-style: italic; } .hljs-strong { font-weight: bold; } .hljs-link { text-decoration: underline; } /* Atom One Dark (v9.16.2) scoped to .dark */ /* Atom One Dark by Daniel Gamage Original One Dark Syntax theme from https://github.com/atom/one-dark-syntax base: #282c34 mono-1: #abb2bf mono-2: #818896 mono-3: #5c6370 hue-1: #56b6c2 hue-2: #61aeee hue-3: #c678dd hue-4: #98c379 hue-5: #e06c75 hue-5-2: #be5046 hue-6: #d19a66 hue-6-2: #e6c07b */ .dark .hljs { display: block; overflow-x: auto; padding: 0.5em; color: #abb2bf; background: #282c34; } .dark .hljs-comment, .dark .hljs-quote { color: #5c6370; font-style: italic; } .dark .hljs-doctag, .dark .hljs-keyword, .dark .hljs-formula { color: #c678dd; } .dark .hljs-section, .dark .hljs-name, .dark .hljs-selector-tag, .dark .hljs-deletion, .dark .hljs-subst { color: #e06c75; } .dark .hljs-literal { color: #56b6c2; } .dark .hljs-string, .dark .hljs-regexp, .dark .hljs-addition, .dark .hljs-attribute, .dark .hljs-meta-string { color: #98c379; } .dark .hljs-built_in, .dark .hljs-class .hljs-title { color: #e6c07b; } .dark .hljs-attr, .dark .hljs-variable, .dark .hljs-template-variable, .dark .hljs-type, .dark .hljs-selector-class, .dark .hljs-selector-attr, .dark .hljs-selector-pseudo, .dark .hljs-number { color: #d19a66; } .dark .hljs-symbol, .dark .hljs-bullet, .dark .hljs-link, .dark .hljs-meta, .dark .hljs-selector-id, .dark .hljs-title { color: #61aeee; } .dark .hljs-emphasis { font-style: italic; } .dark .hljs-strong { font-weight: bold; } .dark .hljs-link { text-decoration: underline; } ================================================ FILE: src/styles/main.css ================================================ @import "./highlight-js.css"; @tailwind base; @tailwind components; @tailwind utilities; html, body { overscroll-behavior: none; touch-action: pan-x pan-y; } @layer components { .btn { @apply inline-flex flex-shrink-0 cursor-pointer select-none items-center justify-center whitespace-nowrap outline-none transition-all focus:ring disabled:cursor-default; } .active-model { /* Ensure active border wins over defaults/utilities in both themes */ @apply !border-black dark:!border-white/60; } .file-hoverable { @apply hover:bg-gray-500/10; } .base-tool { @apply flex h-[1.6rem] items-center gap-[.2rem] whitespace-nowrap border border-transparent text-xs outline-none transition-all focus:outline-none active:outline-none dark:hover:text-gray-300 sm:hover:text-purple-600; } .active-tool { @apply rounded-full !border-purple-200 bg-purple-100 pl-1 pr-2 text-purple-600 hover:text-purple-600 dark:!border-purple-700 dark:bg-purple-600/40 dark:text-purple-200; } } @layer utilities { /* your existing utilities */ .scrollbar-custom { @apply scrollbar-thin scrollbar-track-transparent scrollbar-thumb-black/10 scrollbar-thumb-rounded-full scrollbar-w-1 hover:scrollbar-thumb-black/20 dark:scrollbar-thumb-white/10 dark:hover:scrollbar-thumb-white/20; } .scrollbar-custom::-webkit-scrollbar { background-color: transparent; width: 8px; height: 8px; } .scrollbar-custom::-webkit-scrollbar-thumb { background-color: rgba(0, 0, 0, 0.1); border-radius: 9999px; } .dark .scrollbar-custom::-webkit-scrollbar { background-color: rgba(17, 17, 17, 0.85); } .dark .scrollbar-custom::-webkit-scrollbar-thumb { background-color: rgba(255, 255, 255, 0.1); } /* Rounded top/bottom caps for vertical scrollbars (Chrome/Edge/Safari) */ .scrollbar-custom::-webkit-scrollbar-track { @apply rounded-full bg-clip-padding; /* clip bg to padding so caps look round */ /* space for the end caps — tweak with Tailwind spacing */ border-top: theme("spacing.2") solid transparent; /* 0.5rem */ border-bottom: theme("spacing.2") solid transparent; /* 0.5rem */ } /* Rounded left/right caps for horizontal scrollbars */ .scrollbar-custom::-webkit-scrollbar-track:horizontal { @apply rounded-full bg-clip-padding; border-left: theme("spacing.2") solid transparent; border-right: theme("spacing.2") solid transparent; border-top-width: 0; border-bottom-width: 0; } .no-scrollbar { @apply [-ms-overflow-style:none] [scrollbar-width:none] [&::-ms-scrollbar]:hidden [&::-webkit-scrollbar]:hidden; } .prose table { @apply block max-w-full overflow-x-auto scrollbar-thin scrollbar-track-transparent scrollbar-thumb-black/10 scrollbar-thumb-rounded-full scrollbar-w-1 hover:scrollbar-thumb-black/20 dark:scrollbar-thumb-white/10 dark:hover:scrollbar-thumb-white/20; } /* .scrollbar-custom { @apply scrollbar-thin scrollbar-track-transparent scrollbar-thumb-black/10 scrollbar-thumb-rounded-full scrollbar-w-1 hover:scrollbar-thumb-black/20 dark:scrollbar-thumb-white/10 dark:hover:scrollbar-thumb-white/20; } */ .prose hr { @apply my-4; } .prose strong { @apply font-medium; } .prose pre { @apply border-[0.5px] bg-white text-gray-600 dark:border-gray-700 dark:!bg-gray-900 dark:bg-inherit dark:text-inherit; } .prose code:not(pre code) { @apply rounded-md bg-gray-200/60 px-[0.4em] py-[0.2em] text-[85%] dark:bg-gray-700; } .prose code:not(pre code)::before, .prose code:not(pre code)::after { content: none; } /* Override prose-sm title sizes - 75% of original */ .prose-sm :where(h1):not(:where([class~="not-prose"], [class~="not-prose"] *)) { font-size: 1.6em; /* 75% */ @apply font-semibold; } .prose-sm :where(h2):not(:where([class~="not-prose"], [class~="not-prose"] *)) { font-size: 1.07em; /* 75% */ @apply font-semibold; } .prose-sm :where(h3):not(:where([class~="not-prose"], [class~="not-prose"] *)) { font-size: 0.96em; /* 75% */ @apply font-semibold; } .prose-sm :where(h4):not(:where([class~="not-prose"], [class~="not-prose"] *)) { font-size: 0.8em; /* 75% */ @apply font-semibold; } .prose-sm :where(h5):not(:where([class~="not-prose"], [class~="not-prose"] *)) { font-size: 0.75em; /* 75% */ @apply font-semibold; } .prose-sm :where(h6):not(:where([class~="not-prose"], [class~="not-prose"] *)) { font-size: 0.7em; /* 75% */ @apply font-semibold; } } .katex-display { overflow: auto hidden; } ================================================ FILE: static/chatui/manifest.json ================================================ { "background_color": "#ffffff", "name": "ChatUI", "short_name": "ChatUI", "display": "standalone", "start_url": "/chat", "icons": [ { "src": "/chat/chatui/icon-36x36.png", "sizes": "36x36", "type": "image/png" }, { "src": "/chat/chatui/icon-48x48.png", "sizes": "48x48", "type": "image/png" }, { "src": "/chat/chatui/icon-72x72.png", "sizes": "72x72", "type": "image/png" }, { "src": "/chat/chatui/icon-96x96.png", "sizes": "96x96", "type": "image/png" }, { "src": "/chat/chatui/icon-128x128.png", "sizes": "128x128", "type": "image/png" }, { "src": "/chat/chatui/icon-144x144.png", "sizes": "144x144", "type": "image/png" }, { "src": "/chat/chatui/icon-192x192.png", "sizes": "192x192", "type": "image/png" }, { "src": "/chat/chatui/icon-256x256.png", "sizes": "256x256", "type": "image/png" }, { "src": "/chat/chatui/icon-512x512.png", "sizes": "512x512", "type": "image/png" } ] } ================================================ FILE: static/huggingchat/manifest.json ================================================ { "background_color": "#ffffff", "name": "HuggingChat", "short_name": "HuggingChat", "display": "standalone", "start_url": "/chat", "icons": [ { "src": "/chat/huggingchat/icon-36x36.png", "sizes": "36x36", "type": "image/png" }, { "src": "/chat/huggingchat/icon-48x48.png", "sizes": "48x48", "type": "image/png" }, { "src": "/chat/huggingchat/icon-72x72.png", "sizes": "72x72", "type": "image/png" }, { "src": "/chat/huggingchat/icon-96x96.png", "sizes": "96x96", "type": "image/png" }, { "src": "/chat/huggingchat/icon-128x128.png", "sizes": "128x128", "type": "image/png" }, { "src": "/chat/huggingchat/icon-144x144.png", "sizes": "144x144", "type": "image/png" }, { "src": "/chat/huggingchat/icon-192x192.png", "sizes": "192x192", "type": "image/png" }, { "src": "/chat/huggingchat/icon-256x256.png", "sizes": "256x256", "type": "image/png" }, { "src": "/chat/huggingchat/icon-512x512.png", "sizes": "512x512", "type": "image/png" } ] } ================================================ FILE: static/huggingchat/routes.chat.json ================================================ [ { "name": "job_app_docs", "description": "Create ATS‑ready resumes and cover letters aligned to a job posting.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": [ "deepseek-ai/DeepSeek-V3.1", "moonshotai/Kimi-K2-Instruct-0905", "zai-org/GLM-4.6" ] }, { "name": "email_writing", "description": "Draft or revise emails with clear tone and a specific CTA.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "google/gemma-3-27b-it"] }, { "name": "social_media_copy", "description": "Write platform‑specific social captions and short posts for engagement.", "primary_model": "deepseek-ai/DeepSeek-V3.1", "fallback_models": ["moonshotai/Kimi-K2-Instruct-0905", "Qwen/Qwen3-235B-A22B-Instruct-2507"] }, { "name": "editing_rewrite", "description": "Lightly proofread and rephrase text for tone, length, and clarity.", "primary_model": "moonshotai/Kimi-K2-Instruct-0905", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "google/gemma-3-27b-it", "zai-org/GLM-4.6"] }, { "name": "qa_explanations", "description": "Provide concise answers and plain‑language explanations.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "meta-llama/Llama-3.3-70B-Instruct"] }, { "name": "technical_explanation", "description": "Explain complex technical topics step‑by‑step with worked examples.", "primary_model": "deepseek-ai/DeepSeek-R1-0528", "fallback_models": ["Qwen/QwQ-32B", "moonshotai/Kimi-K2-Instruct-0905"] }, { "name": "essay_writing", "description": "Plan and write essays from outline to draft; citations on request.", "primary_model": "Qwen/Qwen3-235B-A22B-Thinking-2507", "fallback_models": ["deepseek-ai/DeepSeek-R1-0528", "deepseek-ai/DeepSeek-V3.1"] }, { "name": "summarization", "description": "Condense documents into an abstract, key points, and action items.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": [ "deepseek-ai/DeepSeek-V3.1", "meta-llama/Llama-4-Maverick-17B-128E-Instruct" ] }, { "name": "translation", "description": "Translate between languages with register and terminology control.", "primary_model": "CohereLabs/command-a-translate-08-2025", "fallback_models": ["CohereLabs/aya-expanse-32b", "google/gemma-3-27b-it"] }, { "name": "language_tutoring", "description": "Interactive language practice with conversation, grammar, vocab, and feedback.", "primary_model": "CohereLabs/aya-expanse-32b", "fallback_models": [ "CohereLabs/aya-expanse-8b", "google/gemma-3-27b-it", "meta-llama/Llama-3.3-70B-Instruct" ] }, { "name": "formal_proof", "description": "Produce Lean 4 proofs with tactic scripts and subgoals.", "primary_model": "deepseek-ai/DeepSeek-Prover-V2-671B", "fallback_models": ["deepseek-ai/DeepSeek-R1-0528", "Qwen/QwQ-32B"] }, { "name": "software_architecture_design", "description": "Design architectures: views, APIs, data models, and scalability trade‑offs.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "meta-llama/Llama-3.1-405B-Instruct"] }, { "name": "agentic_orchestration", "description": "Plan and execute tool/API calls with schemas, retries, and recovery.", "primary_model": "openai/gpt-oss-120b", "fallback_models": ["zai-org/GLM-4.6", "deepseek-ai/DeepSeek-V3.1"] }, { "name": "code_generation", "description": "Generate new code, tests, and scaffolds from specs.", "primary_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "Qwen/Qwen3-Coder-30B-A3B-Instruct"] }, { "name": "frontend_ui", "description": "Build accessible, responsive UI components and pages.", "primary_model": "deepseek-ai/DeepSeek-R1-0528", "fallback_models": ["Qwen/Qwen3-Coder-480B-A35B-Instruct", "zai-org/GLM-4.6"] }, { "name": "code_maintenance", "description": "Fix bugs and refactor code; add tests.", "primary_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "fallback_models": [ "deepseek-ai/DeepSeek-V3.1", "meta-llama/Llama-4-Maverick-17B-128E-Instruct" ] }, { "name": "code_review_docs", "description": "Explain code and write docs, READMEs, and examples.", "primary_model": "deepseek-ai/DeepSeek-V3.1", "fallback_models": ["meta-llama/Llama-3.3-70B-Instruct", "Qwen/Qwen3-235B-A22B-Instruct-2507"] }, { "name": "terminal_cli", "description": "Solve Linux shell tasks with safe, idempotent commands.", "primary_model": "zai-org/GLM-4.6", "fallback_models": ["meta-llama/Llama-4-Maverick-17B-128E-Instruct", "Qwen/Qwen3-32B"] }, { "name": "travel_planning", "description": "Research trips and craft day‑by‑day itineraries with logistics.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": [ "deepseek-ai/DeepSeek-V3.1", "meta-llama/Llama-4-Maverick-17B-128E-Instruct" ] }, { "name": "shopping_recommendations", "description": "Compare products and recommend ranked picks with rationale.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["zai-org/GLM-4.6", "deepseek-ai/DeepSeek-V3.1"] }, { "name": "meal_planning", "description": "Create meal plans and recipes by diet, budget, and time.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "google/gemma-3-27b-it"] }, { "name": "decision_support", "description": "Score options against criteria and recommend a choice.", "primary_model": "deepseek-ai/DeepSeek-R1-0528", "fallback_models": ["Qwen/Qwen3-235B-A22B-Thinking-2507", "deepseek-ai/DeepSeek-V3.1"] }, { "name": "career_coaching", "description": "Guide job search, skill gaps, interviews, and negotiation.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["meta-llama/Llama-3.3-70B-Instruct", "deepseek-ai/DeepSeek-V3.1"] }, { "name": "personal_finance", "description": "Build budgets, savings plans, and simple tracking schemas.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "Qwen/Qwen3-235B-A22B-Thinking-2507"] }, { "name": "health_wellness_info", "description": "Provide general health, fitness, sleep, and nutrition information.", "primary_model": "aaditya/Llama3-OpenBioLLM-70B", "fallback_models": ["Qwen/Qwen3-235B-A22B-Instruct-2507", "google/gemma-3-27b-it"] }, { "name": "brainstorming_ideas", "description": "Generate many creative ideas, then help narrow choices.", "primary_model": "deepseek-ai/DeepSeek-V3.1", "fallback_models": ["NousResearch/Hermes-4-70B", "Qwen/Qwen3-235B-A22B-Instruct-2507"] }, { "name": "creative_writing", "description": "Write fiction, poems, jokes, or scripts with style control.", "primary_model": "moonshotai/Kimi-K2-Instruct-0905", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "meta-llama/Llama-3.3-70B-Instruct"] }, { "name": "interactive_roleplay", "description": "Run in‑character text adventures and persistent role‑play.", "primary_model": "NousResearch/Hermes-4-70B", "fallback_models": ["moonshotai/Kimi-K2-Instruct-0905", "Qwen/Qwen3-235B-A22B-Instruct-2507"] }, { "name": "character_impersonation", "description": "Act and imitate fictional character voices or invented personas consistently.", "primary_model": "NousResearch/Hermes-4-70B", "fallback_models": ["moonshotai/Kimi-K2-Instruct-0905", "Qwen/Qwen3-235B-A22B-Instruct-2507"] }, { "name": "casual_conversation", "description": "Engage in friendly and open‑ended casual chat.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": ["moonshotai/Kimi-K2-Instruct-0905", "google/gemma-3-27b-it"] }, { "name": "emotional_support", "description": "Provide compassionate listening and gentle guidance for emotional well-being.", "primary_model": "Qwen/Qwen3-235B-A22B-Instruct-2507", "fallback_models": [ "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "deepseek-ai/DeepSeek-V3.1" ] }, { "name": "learning_tutor", "description": "Teach concepts with step-by-step explanations, examples, and practice.", "primary_model": "deepseek-ai/DeepSeek-V3.1", "fallback_models": ["Qwen/Qwen3-235B-A22B-Thinking-2507", "deepseek-ai/DeepSeek-R1-0528"] }, { "name": "structured_data", "description": "Extract structured JSON from text.", "primary_model": "zai-org/GLM-4.6", "fallback_models": ["deepseek-ai/DeepSeek-V3.1", "Qwen/Qwen3-235B-A22B-Instruct-2507"] }, { "name": "spell_checker", "description": "Fix spelling, capitalization, punctuation, and obvious grammar errors.", "primary_model": "CohereLabs/aya-expanse-32b", "fallback_models": ["moonshotai/Kimi-K2-Instruct-0905", "google/gemma-3-27b-it"] } ] ================================================ FILE: static/robots.txt ================================================ User-agent: * Allow: / Allow: /r/ Disallow: /conversation/ Disallow: /api/ Disallow: /login Disallow: /logout # Sitemap # Sitemap: https://huggingface.co/chat/sitemap.xml ================================================ FILE: stub/@reflink/reflink/index.js ================================================ ================================================ FILE: stub/@reflink/reflink/package.json ================================================ { "name": "@reflink/reflink", "version": "0.0.0", "main": "index.js" } ================================================ FILE: svelte.config.js ================================================ import adapterNode from "@sveltejs/adapter-node"; import adapterStatic from "@sveltejs/adapter-static"; import { vitePreprocess } from "@sveltejs/vite-plugin-svelte"; import dotenv from "dotenv"; import { execSync } from "child_process"; dotenv.config({ path: "./.env.local", override: true }); dotenv.config({ path: "./.env" }); const useStatic = process.env.ADAPTER === "static"; function getCurrentCommitSHA() { try { return execSync("git rev-parse HEAD").toString(); } catch (error) { console.error("Error getting current commit SHA:", error); return "unknown"; } } process.env.PUBLIC_VERSION ??= process.env.npm_package_version; process.env.PUBLIC_COMMIT_SHA ??= getCurrentCommitSHA(); process.env.PUBLIC_APP_ASSETS ??= "chatui"; /** @type {import('@sveltejs/kit').Config} */ const config = { // Consult https://kit.svelte.dev/docs/integrations#preprocessors // for more information about preprocessors preprocess: vitePreprocess(), kit: { adapter: useStatic ? adapterStatic({ fallback: "index.html", strict: false }) : adapterNode(), paths: { base: process.env.APP_BASE || "", relative: false, }, csrf: { // handled in hooks.server.ts, because we can have multiple valid origins trustedOrigins: ["*"], }, csp: { directives: { ...(process.env.ALLOW_IFRAME === "true" ? {} : { "frame-ancestors": ["https://huggingface.co"] }), }, }, alias: {}, }, }; export default config; ================================================ FILE: tailwind.config.cjs ================================================ const defaultTheme = require("tailwindcss/defaultTheme"); const colors = require("tailwindcss/colors"); /** @type {import('tailwindcss').Config} */ module.exports = { darkMode: "class", mode: "jit", content: ["./src/**/*.{html,js,svelte,ts}"], theme: { extend: { colors: { gray: { 600: "#323843", 700: "#252a33", 800: "#1b1f27", 900: "#12151c", 950: "#07090d", }, }, fontSize: { xxs: "0.625rem", smd: "0.94rem", }, }, }, plugins: [ require("tailwind-scrollbar")({ nocompatible: true }), require("@tailwindcss/typography"), ], }; ================================================ FILE: tsconfig.json ================================================ { "extends": "./.svelte-kit/tsconfig.json", "compilerOptions": { "allowJs": true, "checkJs": true, "esModuleInterop": true, "forceConsistentCasingInFileNames": true, "resolveJsonModule": true, "skipLibCheck": true, "sourceMap": true, "strict": true, "target": "ES2018" }, "exclude": ["vite.config.ts"] // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias // // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes // from the referenced tsconfig.json - TypeScript does not merge them in } ================================================ FILE: vite.config.ts ================================================ import { sveltekit } from "@sveltejs/kit/vite"; import Icons from "unplugin-icons/vite"; import { promises } from "fs"; import { defineConfig } from "vitest/config"; import { config } from "dotenv"; config({ path: "./.env.local" }); // used to load fonts server side for thumbnail generation function loadTTFAsArrayBuffer() { return { name: "load-ttf-as-array-buffer", async transform(_src, id) { if (id.endsWith(".ttf")) { return `export default new Uint8Array([ ${new Uint8Array(await promises.readFile(id))} ]).buffer`; } }, }; } export default defineConfig({ plugins: [ sveltekit(), Icons({ compiler: "svelte", }), loadTTFAsArrayBuffer(), ], // Allow external access via ngrok tunnel host server: { port: process.env.PORT ? parseInt(process.env.PORT) : 5173, // Allow any ngrok-free.app subdomain (dynamic tunnels) // See Vite server.allowedHosts: string[] | true // Using leading dot matches subdomains per Vite's host check logic allowedHosts: ["huggingface.ngrok.io"], }, optimizeDeps: { include: ["uuid", "sharp", "clsx"], }, test: { workspace: [ ...(process.env.VITEST_BROWSER === "true" ? [ { // Client-side tests (Svelte components), opt-in due flaky browser harness in CI/local extends: "./vite.config.ts", test: { name: "client", environment: "browser", browser: { enabled: true, provider: "playwright", instances: [{ browser: "chromium", headless: true }], }, include: ["src/**/*.svelte.{test,spec}.{js,ts}"], exclude: ["src/lib/server/**", "src/**/*.ssr.{test,spec}.{js,ts}"], setupFiles: ["./scripts/setups/vitest-setup-client.ts"], }, }, ] : []), { // SSR tests (Server-side rendering) extends: "./vite.config.ts", test: { name: "ssr", environment: "node", include: ["src/**/*.ssr.{test,spec}.{js,ts}"], }, }, { // Server-side tests (Node.js utilities) extends: "./vite.config.ts", test: { name: "server", environment: "node", include: ["src/**/*.{test,spec}.{js,ts}"], exclude: ["src/**/*.svelte.{test,spec}.{js,ts}", "src/**/*.ssr.{test,spec}.{js,ts}"], setupFiles: ["./scripts/setups/vitest-setup-server.ts"], testTimeout: 30000, hookTimeout: 30000, }, }, ], }, });