Repository: containers/podman-desktop-extension-ai-lab Branch: main Commit: 28796a6eff7c Files: 457 Total size: 2.2 MB Directory structure: gitextract_r3viv2ck/ ├── .dockerignore ├── .editorconfig ├── .fmf/ │ └── version ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ ├── epic.yml │ │ ├── feature_request.yml │ │ └── ux-request.yaml │ ├── PULL_REQUEST_TEMPLATE.md │ ├── dependabot.yml │ └── workflows/ │ ├── ai-lab-e2e-nightly-windows.yaml │ ├── build-next.yaml │ ├── compute-model-sizes.yml │ ├── e2e-main-tf.yaml │ ├── e2e-main.yaml │ ├── llama-stack-playground.yaml │ ├── pr-check.yaml │ ├── ramalama.yaml │ ├── recipe-catalog-change-cleanup.yaml │ ├── recipe-catalog-change-template.yaml │ ├── recipe-catalog-change-trigger.yaml │ ├── release.yaml │ ├── update-ramalama-references.sh │ └── update-ramalama-references.yaml ├── .gitignore ├── .husky/ │ ├── commit-msg │ └── pre-commit ├── .npmrc ├── .prettierrc ├── .vscode/ │ └── settings.json ├── CODE-OF-CONDUCT.md ├── Containerfile ├── LICENSE ├── MIGRATION.md ├── PACKAGING-GUIDE.md ├── README.md ├── RELEASE.md ├── SECURITY.md ├── USAGE_DATA.md ├── api/ │ └── openapi.yaml ├── clean.sh ├── commitlint.config.js ├── docs/ │ └── proposals/ │ ├── ai-studio.md │ └── state-management.md ├── eslint.config.mjs ├── package.json ├── packages/ │ ├── backend/ │ │ ├── .gitignore │ │ ├── __mocks__/ │ │ │ └── @podman-desktop/ │ │ │ └── api.js │ │ ├── package.json │ │ ├── src/ │ │ │ ├── assets/ │ │ │ │ ├── ai.json │ │ │ │ ├── inference-images.json │ │ │ │ ├── instructlab-images.json │ │ │ │ ├── llama-stack-images.json │ │ │ │ ├── llama-stack-playground-images.json │ │ │ │ └── openai.json │ │ │ ├── extension.spec.ts │ │ │ ├── extension.ts │ │ │ ├── instructlab-api-impl.ts │ │ │ ├── llama-stack-api-impl.ts │ │ │ ├── managers/ │ │ │ │ ├── GPUManager.spec.ts │ │ │ │ ├── GPUManager.ts │ │ │ │ ├── SnippetManager.spec.ts │ │ │ │ ├── SnippetManager.ts │ │ │ │ ├── TaskRunner.spec.ts │ │ │ │ ├── TaskRunner.ts │ │ │ │ ├── apiServer.spec.ts │ │ │ │ ├── apiServer.ts │ │ │ │ ├── application/ │ │ │ │ │ ├── applicationManager.spec.ts │ │ │ │ │ └── applicationManager.ts │ │ │ │ ├── catalogManager.spec.ts │ │ │ │ ├── catalogManager.ts │ │ │ │ ├── gitManager.spec.ts │ │ │ │ ├── gitManager.ts │ │ │ │ ├── inference/ │ │ │ │ │ ├── inferenceManager.spec.ts │ │ │ │ │ └── inferenceManager.ts │ │ │ │ ├── instructlab/ │ │ │ │ │ ├── instructlabManager.spec.ts │ │ │ │ │ └── instructlabManager.ts │ │ │ │ ├── llama-stack/ │ │ │ │ │ ├── llamaStackManager.spec.ts │ │ │ │ │ └── llamaStackManager.ts │ │ │ │ ├── modelsManager.spec.ts │ │ │ │ ├── modelsManager.ts │ │ │ │ ├── monitoringManager.spec.ts │ │ │ │ ├── monitoringManager.ts │ │ │ │ ├── playground/ │ │ │ │ │ ├── McpServerManager.spec.ts │ │ │ │ │ ├── McpServerManager.ts │ │ │ │ │ ├── aiSdk.spec.ts │ │ │ │ │ └── aiSdk.ts │ │ │ │ ├── playgroundV2Manager.spec.ts │ │ │ │ ├── playgroundV2Manager.ts │ │ │ │ ├── podmanConnection.spec.ts │ │ │ │ ├── podmanConnection.ts │ │ │ │ ├── recipes/ │ │ │ │ │ ├── BuilderManager.spec.ts │ │ │ │ │ ├── BuilderManager.ts │ │ │ │ │ ├── PodManager.spec.ts │ │ │ │ │ ├── PodManager.ts │ │ │ │ │ ├── RecipeManager.spec.ts │ │ │ │ │ └── RecipeManager.ts │ │ │ │ └── snippets/ │ │ │ │ ├── java-okhttp-snippet.spec.ts │ │ │ │ ├── java-okhttp-snippet.ts │ │ │ │ ├── python-langchain-snippet.spec.ts │ │ │ │ ├── python-langchain-snippet.ts │ │ │ │ ├── quarkus-snippet.spec.ts │ │ │ │ └── quarkus-snippet.ts │ │ │ ├── models/ │ │ │ │ ├── AIConfig.spec.ts │ │ │ │ ├── AIConfig.ts │ │ │ │ ├── ApplicationOptions.ts │ │ │ │ ├── HuggingFaceModelHandler.spec.ts │ │ │ │ ├── HuggingFaceModelHandler.ts │ │ │ │ ├── ModelHandler.ts │ │ │ │ ├── TaskRunner.ts │ │ │ │ ├── URLModelHandler.ts │ │ │ │ └── baseEvent.ts │ │ │ ├── registries/ │ │ │ │ ├── ApplicationRegistry.ts │ │ │ │ ├── CancellationTokenRegistry.spec.ts │ │ │ │ ├── CancellationTokenRegistry.ts │ │ │ │ ├── ConfigurationRegistry.spec.ts │ │ │ │ ├── ConfigurationRegistry.ts │ │ │ │ ├── ContainerRegistry.spec.ts │ │ │ │ ├── ContainerRegistry.ts │ │ │ │ ├── ConversationRegistry.ts │ │ │ │ ├── InferenceProviderRegistry.ts │ │ │ │ ├── LocalRepositoryRegistry.spec.ts │ │ │ │ ├── LocalRepositoryRegistry.ts │ │ │ │ ├── ModelHandlerRegistry.ts │ │ │ │ ├── NavigationRegistry.spec.ts │ │ │ │ ├── NavigationRegistry.ts │ │ │ │ ├── TaskRegistry.spec.ts │ │ │ │ └── TaskRegistry.ts │ │ │ ├── studio-api-impl.spec.ts │ │ │ ├── studio-api-impl.ts │ │ │ ├── studio.spec.ts │ │ │ ├── studio.ts │ │ │ ├── templates/ │ │ │ │ ├── java-okhttp.mustache │ │ │ │ ├── python-langchain.mustache │ │ │ │ └── quarkus-langchain4j.mustache │ │ │ ├── tests/ │ │ │ │ ├── ai-test.json │ │ │ │ ├── ai-user-test.json │ │ │ │ └── utils.ts │ │ │ ├── utils/ │ │ │ │ ├── JsonWatcher.spec.ts │ │ │ │ ├── JsonWatcher.ts │ │ │ │ ├── Publisher.spec.ts │ │ │ │ ├── Publisher.ts │ │ │ │ ├── RecipeConstants.ts │ │ │ │ ├── arch.ts │ │ │ │ ├── catalogUtils.spec.ts │ │ │ │ ├── catalogUtils.ts │ │ │ │ ├── downloader.ts │ │ │ │ ├── imagesUtils.spec.ts │ │ │ │ ├── imagesUtils.ts │ │ │ │ ├── inferenceUtils.spec.ts │ │ │ │ ├── inferenceUtils.ts │ │ │ │ ├── mcpUtils.ts │ │ │ │ ├── modelsUtils.spec.ts │ │ │ │ ├── modelsUtils.ts │ │ │ │ ├── pathUtils.ts │ │ │ │ ├── podman.spec.ts │ │ │ │ ├── podman.ts │ │ │ │ ├── podsUtils.ts │ │ │ │ ├── ports.ts │ │ │ │ ├── randomUtils.ts │ │ │ │ ├── sha.spec.ts │ │ │ │ ├── sha.ts │ │ │ │ ├── uploader.spec.ts │ │ │ │ ├── uploader.ts │ │ │ │ ├── urldownloader.spec.ts │ │ │ │ ├── urldownloader.ts │ │ │ │ └── utils.ts │ │ │ ├── webviewUtils.spec.ts │ │ │ ├── webviewUtils.ts │ │ │ └── workers/ │ │ │ ├── IWorker.ts │ │ │ ├── WindowsWorker.ts │ │ │ ├── provider/ │ │ │ │ ├── InferenceProvider.spec.ts │ │ │ │ ├── InferenceProvider.ts │ │ │ │ ├── LlamaCppPython.spec.ts │ │ │ │ ├── LlamaCppPython.ts │ │ │ │ ├── OpenVINO.spec.ts │ │ │ │ ├── OpenVINO.ts │ │ │ │ ├── WhisperCpp.spec.ts │ │ │ │ └── WhisperCpp.ts │ │ │ └── uploader/ │ │ │ ├── UploaderOptions.ts │ │ │ ├── WSLUploader.spec.ts │ │ │ └── WSLUploader.ts │ │ ├── tsconfig.json │ │ ├── vite.config.js │ │ └── vitest.config.js │ ├── frontend/ │ │ ├── index.html │ │ ├── package.json │ │ ├── src/ │ │ │ ├── App.spec.ts │ │ │ ├── App.svelte │ │ │ ├── Route.svelte │ │ │ ├── app.css │ │ │ ├── index.html │ │ │ ├── lib/ │ │ │ │ ├── ApplicationActions.spec.ts │ │ │ │ ├── ApplicationActions.svelte │ │ │ │ ├── Badge.spec.ts │ │ │ │ ├── Badge.svelte │ │ │ │ ├── Card.svelte │ │ │ │ ├── ContentDetailsLayout.spec.ts │ │ │ │ ├── ContentDetailsLayout.svelte │ │ │ │ ├── ContentDetailsLayoutTest.svelte │ │ │ │ ├── ExpandableMessage.svelte │ │ │ │ ├── FlatMenu.svelte │ │ │ │ ├── Navigation.spec.ts │ │ │ │ ├── Navigation.svelte │ │ │ │ ├── RangeInput.svelte │ │ │ │ ├── RecipeCard.spec.ts │ │ │ │ ├── RecipeCard.svelte │ │ │ │ ├── RecipeCardTags.spec.ts │ │ │ │ ├── RecipeCardTags.svelte │ │ │ │ ├── RecipeCardTags.ts │ │ │ │ ├── RecipeDetails.spec.ts │ │ │ │ ├── RecipeDetails.svelte │ │ │ │ ├── RecipeStatus.spec.ts │ │ │ │ ├── RecipeStatus.svelte │ │ │ │ ├── RecipesCard.spec.ts │ │ │ │ ├── RecipesCard.svelte │ │ │ │ ├── button/ │ │ │ │ │ ├── CopyButton.spec.ts │ │ │ │ │ ├── CopyButton.svelte │ │ │ │ │ └── ListItemButtonIcon.svelte │ │ │ │ ├── conversation/ │ │ │ │ │ ├── ChatMessage.svelte │ │ │ │ │ ├── ConversationActions.svelte │ │ │ │ │ ├── ElapsedTime.svelte │ │ │ │ │ ├── SystemPromptBanner.spec.ts │ │ │ │ │ ├── SystemPromptBanner.svelte │ │ │ │ │ ├── ToolCallMessage.spec.ts │ │ │ │ │ └── ToolCallMessage.svelte │ │ │ │ ├── icons/ │ │ │ │ │ ├── InstructLabIcon.svelte │ │ │ │ │ ├── ModelStatusIcon.spec.ts │ │ │ │ │ ├── ModelStatusIcon.svelte │ │ │ │ │ ├── ModelWhite.svelte │ │ │ │ │ ├── PlaygroundWhite.svelte │ │ │ │ │ └── RemoteModel.svelte │ │ │ │ ├── images/ │ │ │ │ │ ├── DashboardBanner.svelte │ │ │ │ │ ├── PodIcon.svelte │ │ │ │ │ └── VSCodeIcon.svelte │ │ │ │ ├── instructlab/ │ │ │ │ │ ├── AboutInstructLabDiscoverCard.svelte │ │ │ │ │ └── AboutInstructLabExploreCard.svelte │ │ │ │ ├── markdown/ │ │ │ │ │ ├── LinkComponent.svelte │ │ │ │ │ └── MarkdownRenderer.svelte │ │ │ │ ├── monaco-editor/ │ │ │ │ │ ├── MonacoEditor.svelte │ │ │ │ │ └── monaco.ts │ │ │ │ ├── notification/ │ │ │ │ │ ├── ContainerConnectionStatusInfo.spec.ts │ │ │ │ │ ├── ContainerConnectionStatusInfo.svelte │ │ │ │ │ ├── ContainerConnectionWrapper.spec.ts │ │ │ │ │ ├── ContainerConnectionWrapper.svelte │ │ │ │ │ ├── GPUEnabledMachine.spec.ts │ │ │ │ │ ├── GPUEnabledMachine.svelte │ │ │ │ │ ├── GPUPromotion.spec.ts │ │ │ │ │ └── GPUPromotion.svelte │ │ │ │ ├── progress/ │ │ │ │ │ ├── TaskItem.spec.ts │ │ │ │ │ ├── TaskItem.svelte │ │ │ │ │ ├── TasksBanner.spec.ts │ │ │ │ │ ├── TasksBanner.svelte │ │ │ │ │ ├── TasksProgress.spec.ts │ │ │ │ │ ├── TasksProgress.svelte │ │ │ │ │ ├── TrackedTasks.spec.ts │ │ │ │ │ └── TrackedTasks.svelte │ │ │ │ ├── select/ │ │ │ │ │ ├── ContainerProviderConnectionSelect.spec.ts │ │ │ │ │ ├── ContainerProviderConnectionSelect.svelte │ │ │ │ │ ├── InferenceRuntimeSelect.spec.ts │ │ │ │ │ ├── InferenceRuntimeSelect.svelte │ │ │ │ │ ├── ModelSelect.spec.ts │ │ │ │ │ ├── ModelSelect.svelte │ │ │ │ │ ├── Select.spec.ts │ │ │ │ │ └── Select.svelte │ │ │ │ └── table/ │ │ │ │ ├── application/ │ │ │ │ │ ├── ApplicationTable.spec.ts │ │ │ │ │ ├── ApplicationTable.svelte │ │ │ │ │ ├── ColumnActions.svelte │ │ │ │ │ ├── ColumnAge.svelte │ │ │ │ │ ├── ColumnModel.spec.ts │ │ │ │ │ ├── ColumnModel.svelte │ │ │ │ │ ├── ColumnPod.svelte │ │ │ │ │ ├── ColumnRecipe.spec.ts │ │ │ │ │ ├── ColumnRecipe.svelte │ │ │ │ │ ├── ColumnRuntime.spec.ts │ │ │ │ │ ├── ColumnRuntime.svelte │ │ │ │ │ └── ColumnStatus.svelte │ │ │ │ ├── instructlab/ │ │ │ │ │ ├── InstructlabColumnAge.svelte │ │ │ │ │ ├── InstructlabColumnModelName.spec.ts │ │ │ │ │ ├── InstructlabColumnModelName.svelte │ │ │ │ │ ├── InstructlabColumnName.svelte │ │ │ │ │ ├── InstructlabColumnRepository.svelte │ │ │ │ │ ├── InstructlabColumnStatus.svelte │ │ │ │ │ └── InstructlabColumnTargetModelName.svelte │ │ │ │ ├── model/ │ │ │ │ │ ├── ModelColumnAction.spec.ts │ │ │ │ │ ├── ModelColumnActions.svelte │ │ │ │ │ ├── ModelColumnAge.spec.ts │ │ │ │ │ ├── ModelColumnAge.svelte │ │ │ │ │ ├── ModelColumnLabels.svelte │ │ │ │ │ ├── ModelColumnName.spec.ts │ │ │ │ │ ├── ModelColumnName.svelte │ │ │ │ │ ├── ModelColumnRecipeSelection.svelte │ │ │ │ │ ├── ModelColumnSize.spec.ts │ │ │ │ │ └── ModelColumnSize.svelte │ │ │ │ ├── playground/ │ │ │ │ │ ├── ConversationColumnAction.spec.ts │ │ │ │ │ ├── ConversationColumnAction.svelte │ │ │ │ │ ├── PlaygroundColumnIcon.svelte │ │ │ │ │ ├── PlaygroundColumnModel.svelte │ │ │ │ │ ├── PlaygroundColumnName.svelte │ │ │ │ │ ├── PlaygroundColumnRuntime.spec.ts │ │ │ │ │ └── PlaygroundColumnRuntime.svelte │ │ │ │ └── service/ │ │ │ │ ├── ServiceAction.spec.ts │ │ │ │ ├── ServiceAction.svelte │ │ │ │ ├── ServiceColumnModelName.spec.ts │ │ │ │ ├── ServiceColumnModelName.svelte │ │ │ │ ├── ServiceColumnName.spec.ts │ │ │ │ ├── ServiceColumnName.svelte │ │ │ │ ├── ServiceColumnRuntime.spec.ts │ │ │ │ ├── ServiceColumnRuntime.svelte │ │ │ │ ├── ServiceStatus.spec.ts │ │ │ │ └── ServiceStatus.svelte │ │ │ ├── main.ts │ │ │ ├── models/ │ │ │ │ └── IRouterState.ts │ │ │ ├── pages/ │ │ │ │ ├── Applications.svelte │ │ │ │ ├── CreateService.spec.ts │ │ │ │ ├── CreateService.svelte │ │ │ │ ├── Dashboard.spec.ts │ │ │ │ ├── Dashboard.svelte │ │ │ │ ├── ImportModel.spec.ts │ │ │ │ ├── ImportModel.svelte │ │ │ │ ├── InferenceServerDetails.spec.ts │ │ │ │ ├── InferenceServerDetails.svelte │ │ │ │ ├── InferenceServers.spec.ts │ │ │ │ ├── InferenceServers.svelte │ │ │ │ ├── Model.spec.ts │ │ │ │ ├── Model.svelte │ │ │ │ ├── Models.spec.ts │ │ │ │ ├── Models.svelte │ │ │ │ ├── NewInstructLabSession.spec.ts │ │ │ │ ├── NewInstructLabSession.svelte │ │ │ │ ├── Playground.spec.ts │ │ │ │ ├── Playground.svelte │ │ │ │ ├── PlaygroundCreate.spec.ts │ │ │ │ ├── PlaygroundCreate.svelte │ │ │ │ ├── Playgrounds.spec.ts │ │ │ │ ├── Playgrounds.svelte │ │ │ │ ├── Preferences.svelte │ │ │ │ ├── Recipe.spec.ts │ │ │ │ ├── Recipe.svelte │ │ │ │ ├── Recipes.spec.ts │ │ │ │ ├── Recipes.svelte │ │ │ │ ├── StartRecipe.spec.ts │ │ │ │ ├── StartRecipe.svelte │ │ │ │ ├── TuneSessions.spec.ts │ │ │ │ ├── TuneSessions.svelte │ │ │ │ ├── applications.ts │ │ │ │ ├── instructlab/ │ │ │ │ │ ├── AboutInstructLab.spec.ts │ │ │ │ │ ├── AboutInstructLab.svelte │ │ │ │ │ ├── StartInstructLabContainer.spec.ts │ │ │ │ │ └── StartInstructLabContainer.svelte │ │ │ │ ├── llama-stack/ │ │ │ │ │ ├── StartLlamaStackContainer.spec.ts │ │ │ │ │ └── StartLlamaStackContainer.svelte │ │ │ │ └── server-information/ │ │ │ │ ├── LocalServer.spec.ts │ │ │ │ └── LocalServer.svelte │ │ │ ├── stores/ │ │ │ │ ├── application-states.ts │ │ │ │ ├── catalog.ts │ │ │ │ ├── containerProviderConnections.ts │ │ │ │ ├── conversations.ts │ │ │ │ ├── extensionConfiguration.ts │ │ │ │ ├── inferenceServers.ts │ │ │ │ ├── instructlabSessions.ts │ │ │ │ ├── localRepositories.ts │ │ │ │ ├── modelsInfo.spec.ts │ │ │ │ ├── modelsInfo.ts │ │ │ │ ├── rpcReadable.spec.ts │ │ │ │ ├── rpcReadable.ts │ │ │ │ ├── snippetLanguages.ts │ │ │ │ └── tasks.ts │ │ │ └── utils/ │ │ │ ├── categoriesUtils.ts │ │ │ ├── client.ts │ │ │ ├── dimensions.ts │ │ │ ├── fileUtils.ts │ │ │ ├── localRepositoriesUtils.ts │ │ │ ├── printers.ts │ │ │ ├── taskUtils.ts │ │ │ └── versionControlUtils.ts │ │ ├── tailwind.config.cjs │ │ ├── tsconfig.json │ │ └── vite.config.js │ └── shared/ │ ├── __mocks__/ │ │ └── @podman-desktop/ │ │ └── api.js │ ├── src/ │ │ ├── InstructlabAPI.ts │ │ ├── LlamaStackAPI.ts │ │ ├── Messages.ts │ │ ├── StudioAPI.ts │ │ ├── messages/ │ │ │ ├── MessageProxy.spec.ts │ │ │ └── MessageProxy.ts │ │ ├── models/ │ │ │ ├── FilterRecipesResult.ts │ │ │ ├── IApplicationCatalog.ts │ │ │ ├── IApplicationState.ts │ │ │ ├── ICategory.ts │ │ │ ├── IContainerConnectionInfo.ts │ │ │ ├── IExtensionConfiguration.ts │ │ │ ├── IGPUInfo.ts │ │ │ ├── IInference.spec.ts │ │ │ ├── IInference.ts │ │ │ ├── ILocalModelInfo.ts │ │ │ ├── ILocalRepository.ts │ │ │ ├── IModelInfo.ts │ │ │ ├── IModelOptions.ts │ │ │ ├── IModelResponse.ts │ │ │ ├── IPlaygroundMessage.ts │ │ │ ├── IPlaygroundV2.ts │ │ │ ├── IPodman.ts │ │ │ ├── IRecipe.ts │ │ │ ├── IRecipeModelIndex.ts │ │ │ ├── ITask.ts │ │ │ ├── InferenceServerConfig.ts │ │ │ ├── McpSettings.ts │ │ │ ├── RequestOptions.ts │ │ │ ├── instructlab/ │ │ │ │ ├── IInstructlabContainerConfiguration.ts │ │ │ │ ├── IInstructlabContainerInfo.ts │ │ │ │ └── IInstructlabSession.ts │ │ │ └── llama-stack/ │ │ │ ├── LlamaStackContainerConfiguration.ts │ │ │ └── LlamaStackContainerInfo.ts │ │ └── uri/ │ │ ├── Uri.spec.ts │ │ └── Uri.ts │ ├── tsconfig.json │ ├── vite.config.js │ └── vitest.config.js ├── pnpm-workspace.yaml ├── tests/ │ ├── playwright/ │ │ ├── package.json │ │ ├── playwright.config.ts │ │ ├── src/ │ │ │ ├── ai-lab-extension.spec.ts │ │ │ ├── model/ │ │ │ │ ├── ai-lab-app-details-page.ts │ │ │ │ ├── ai-lab-base-page.ts │ │ │ │ ├── ai-lab-creating-model-service-page.ts │ │ │ │ ├── ai-lab-dashboard-page.ts │ │ │ │ ├── ai-lab-local-server-page.ts │ │ │ │ ├── ai-lab-model-catalog-page.ts │ │ │ │ ├── ai-lab-model-llamastack-page.ts │ │ │ │ ├── ai-lab-model-service-page.ts │ │ │ │ ├── ai-lab-navigation-bar.ts │ │ │ │ ├── ai-lab-playground-details-page.ts │ │ │ │ ├── ai-lab-playgrounds-page.ts │ │ │ │ ├── ai-lab-recipes-catalog-page.ts │ │ │ │ ├── ai-lab-running-apps-page.ts │ │ │ │ ├── ai-lab-service-details-page.ts │ │ │ │ ├── ai-lab-start-recipe-page.ts │ │ │ │ ├── ai-lab-try-instructlab-page.ts │ │ │ │ ├── podman-extension-ai-lab-details-page.ts │ │ │ │ └── preferences-extension-ai-lab-page.ts │ │ │ └── utils/ │ │ │ ├── aiLabHandler.ts │ │ │ └── webviewHandler.ts │ │ └── tsconfig.json │ └── tmt/ │ ├── plans/ │ │ ├── ai-lab-e2e-plan-default.fmf │ │ └── ai-lab-e2e-plan-gpu.fmf │ ├── scripts/ │ │ ├── create-results.sh │ │ └── install-podman.sh │ └── tests/ │ ├── e2e-test.fmf │ ├── instructlab-test.fmf │ └── smoke-test.fmf ├── tools/ │ └── compute-model-sizes.sh └── types/ ├── additional.d.ts ├── mustache.d.ts ├── podman-desktop-api.d.ts └── postman-code-generators.d.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ node_modules ================================================ FILE: .editorconfig ================================================ # EditorConfig is awesome: http://EditorConfig.org # https://github.com/jokeyrhyme/standard-editorconfig # top-most EditorConfig file root = true # defaults [*] charset = utf-8 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true indent_size = 2 indent_style = space [*.md] trim_trailing_whitespace = false ================================================ FILE: .fmf/version ================================================ 1 ================================================ FILE: .gitattributes ================================================ * text=auto eol=lf ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug 🐞 description: Report a bug report type: bug body: - type: markdown attributes: value: | Before opening a bug report, please search for the behaviour in the existing issues. --- Thank you for taking the time to file a bug report. To address this bug as fast as possible, we need some information. - type: textarea id: bug-description attributes: label: Bug description description: What happened? validations: required: true - type: input id: os attributes: label: Operating system description: "Which operating system are you on? Please provide the version as well. If you are on a Mac, please specify Apple silicon or Intel." placeholder: "macOS Ventura 13.4 (Arm), Windows 11" validations: required: true - type: dropdown id: install attributes: label: Installation Method description: "How did you install AI Lab ?" options: - "from `ghcr.io/containers/podman-desktop-extension-ai-lab` container image" - "from Podman-Desktop extension page" - "Other" - type: dropdown id: version attributes: label: Version description: What version of the software are you running? options: - "next (development version)" - "1.3.x" - "1.2.x" - "1.1.x" - "1.0.x" validations: required: true - type: textarea id: steps attributes: label: Steps to reproduce description: What steps do we need to take to reproduce this error? - type: textarea id: logs attributes: label: Relevant log output description: If applicable, provide relevant log output. render: shell - type: textarea id: additional-context attributes: label: Additional context description: Add any other context or screenshots here. ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false ================================================ FILE: .github/ISSUE_TEMPLATE/epic.yml ================================================ name: Epic ⚡ description: A high-level feature type: epic body: - type: markdown attributes: value: | Epics are normally created by the development team, to group a set of related features and plan work across multiple sprints. The features this epic includes are referenced with the text of the epic. - type: textarea id: domain attributes: label: Epic domain description: A clear and concise description of the feature area or domain that this epic will address. placeholder: AI-Lab should support [...] validations: required: true - type: textarea id: additional-context attributes: label: Additional context description: Add any other context or screenshots here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: Feature 💡 description: A request, idea, or new functionality type: feature body: - type: markdown attributes: value: | Before opening a feature request, please search for potential existing issues. --- Thank you for taking the time to file a feature request, we appreciate and value your time to help the project! - type: textarea id: problem attributes: label: Is your feature request related to a problem? Please describe description: A clear and concise description of what the problem is. placeholder: I'm always frustrated when [...] validations: required: true - type: textarea id: solution attributes: label: Describe the solution you'd like description: A clear and concise description of what you want to happen. validations: required: true - type: textarea id: alternatives attributes: label: Describe alternatives you've considered description: A clear and concise description of any alternative solutions or features you've considered. - type: textarea id: additional-context attributes: label: Additional context description: Add any other context or screenshots here. ================================================ FILE: .github/ISSUE_TEMPLATE/ux-request.yaml ================================================ name: UX Request description: UX Request Form type: UX (design spec) labels: [UX/UI Issue, Graphic design] body: - type: markdown attributes: value: | Before opening a UX request, please search for existing issues. --- - type: textarea id: UX-description attributes: label: UX Description description: Describe the request validations: required: true - type: dropdown id: request-type attributes: label: Request type description: "What type of request is this?" options: - "A logo design" - "An icon" - "An infographic/chart" - "a template or design for printed materials" - "Swag design" - "Graphic design not covered by the above" - type: dropdown id: user-experience attributes: label: User Experience Request type description: "What type of request is this?" options: - "UX analysis/suggestions for improvement" - "User research" - "User testing" - "Application mockups/designs" - "Website mockups/designs" - "Something else UX-related" - type: textarea id: Contacts attributes: label: Engineering Contact description: Who is the primary engineer the design team can speak with about this issue? - type: textarea id: Deadlne attributes: label: Deadline for request description: When do you need this? If this is for an event, please let us know the date of the evnt and any lead time you need to get materials produced. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ### What does this PR do? ### Screenshot / video of UI ### What issues does this PR fix or reference? ### How to test this PR? ================================================ FILE: .github/dependabot.yml ================================================ # Set update schedule for GitHub Actions version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" open-pull-requests-limit: 10 - package-ecosystem: "npm" directory: "/" schedule: interval: daily open-pull-requests-limit: 10 groups: fortawesome: applies-to: version-updates patterns: - "@fortawesome/*" ai-sdk: applies-to: version-updates patterns: - "@ai-sdk/mcp" - "ai" ================================================ FILE: .github/workflows/ai-lab-e2e-nightly-windows.yaml ================================================ # # Copyright (C) 2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: Podman Desktop AI Lab E2E Nightly run-name: Podman Desktop AI Lab E2E Nightly ${{ github.event_name == 'push' && '[Recipe change]' || '' }} on: schedule: - cron: '0 2 * * *' push: paths: - 'packages/backend/src/assets/ai.json' workflow_dispatch: inputs: podman_desktop_repo_args: default: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main' description: 'Podman Desktop repo fork and branch' type: string required: true ext_repo_options: default: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main' description: 'Podman Desktop Extension repo, fork and branch' type: string required: true ext_tests_options: default: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=1,EXT_TEST_GPU_SUPPORT_ENABLED=0' description: 'E2E tests options in format VAR1=xxx,VAR2=true,VAR3=15 etc.' type: string required: true npm_target: default: 'test:e2e' description: 'npm target to run tests' type: string required: true podman_version: default: 'latest' description: 'Podman version (use "latest" to auto-fetch latest release, or specify version like "v5.6.1")' type: string required: true podman_options: default: 'INIT=1,START=1,ROOTFUL=1,NETWORKING=0' description: 'Podman machine configuration options, no spaces' type: string required: true env_vars: default: 'TEST_PODMAN_MACHINE=true,ELECTRON_ENABLE_INSPECT=true' description: 'Env. Variables passed into target machine, ie: VAR1=xxx,VAR2=true... use EXT_TEST_RAG_CHATBOT=1 to run RAG Chatbot test"' type: string required: true pde2e_image_version: default: 'v0.0.3' description: 'PDE2E runner, builder, podman image versions' type: string required: true mapt_params: description: | **Create instance(leave empty to use repo secrets/variables)** **Format:** IMAGE=xxx;VERSION_TAG=xxx;CPUS=xxx;MEMORY=xxx;EXCLUDED_REGIONS=xxx **Example:** IMAGE=quay.io/redhat-developer/mapt;VERSION_TAG=v0.9.8;CPUS=4;MEMORY=32;EXCLUDED_REGIONS="westindia,centralindia,southindia,australiacentral,australiacentral2,australiaeast,australiasoutheast,southafricanorth,southafricawest" required: false type: string jobs: windows: timeout-minutes: 180 name: windows-${{ matrix.windows-version }}-${{ matrix.windows-featurepack }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: windows-version: ['11'] windows-featurepack: ['25h2-ent'] steps: - name: Fetch latest Podman version id: fetch-podman uses: redhat-actions/podman-install/.github/actions/fetch-latest-podman-version-windows@6b757b792b67ec663765a4f2ca36226e12b2f4cd with: version_input: ${{ github.event.inputs.podman_version || 'latest' }} file_type: 'setup.exe' github_token: ${{ secrets.GITHUB_TOKEN }} - name: Set the default env. variables env: CI: true DEFAULT_PODMAN_DESKTOP_REPO_ARGS: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main' DEFAULT_NPM_TARGET: 'test:e2e' DEFAULT_ENV_VARS: 'TEST_PODMAN_MACHINE=true,ELECTRON_ENABLE_INSPECT=true' DEFAULT_PODMAN_OPTIONS: 'INIT=1,START=1,ROOTFUL=1,NETWORKING=0' DEFAULT_EXT_TESTS_OPTIONS: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=1,EXT_TEST_GPU_SUPPORT_ENABLED=0' DEFAULT_EXT_REPO_OPTIONS: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main' DEFAULT_PDE2E_IMAGE_VERSION: 'v0.0.3' run: | echo "NPM_TARGET=${{ github.event.inputs.npm_target || env.DEFAULT_NPM_TARGET }}" >> $GITHUB_ENV echo "ENV_VARS=${{ github.event.inputs.env_vars || env.DEFAULT_ENV_VARS }}" >> $GITHUB_ENV echo "PODMAN_URL=${{ steps.fetch-podman.outputs.download_url }}" >> $GITHUB_ENV echo "PDE2E_IMAGE_VERSION=${{ github.event.inputs.pde2e_image_version || env.DEFAULT_PDE2E_IMAGE_VERSION }}" >> $GITHUB_ENV echo "${{ github.event.inputs.podman_desktop_repo_args || env.DEFAULT_PODMAN_DESKTOP_REPO_ARGS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PD_"kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ github.event.inputs.ext_tests_options || env.DEFAULT_EXT_TESTS_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ github.event.inputs.podman_options || env.DEFAULT_PODMAN_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PODMAN_"kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ github.event.inputs.ext_repo_options || env.DEFAULT_EXT_REPO_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "EXT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV # For mapt_params, use repo variables directly if input is empty if [ -n "${{ github.event.inputs.mapt_params }}" ]; then mapt_params="${{ github.event.inputs.mapt_params }}" else mapt_params="IMAGE=${{ vars.MAPT_IMAGE }};VERSION_TAG=${{ vars.MAPT_VERSION_TAG }};CPUS=${{ vars.MAPT_CPUS }};MEMORY=${{ vars.MAPT_MEMORY }};EXCLUDED_REGIONS=\"${{ vars.MAPT_EXCLUDED_REGIONS }}\"" fi echo "$mapt_params" | awk -F ';' '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "MAPT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV - name: Create instance uses: podman-desktop/e2e/.github/actions/create-instance@213a276952d746324895f63cea0b23083013990f with: mapt-image: ${{ env.MAPT_IMAGE || '' }} mapt-version: ${{ env.MAPT_VERSION_TAG || '' }} windows-version: ${{ matrix.windows-version }} windows-featurepack: ${{ matrix.windows-featurepack }} cpus: ${{ env.MAPT_CPUS || '' }} memory: ${{ env.MAPT_MEMORY || '' }} excluded-regions: ${{ env.MAPT_EXCLUDED_REGIONS || '' }} arm-tenant-id: ${{ secrets.ARM_TENANT_ID }} arm-subscription-id: ${{ secrets.ARM_SUBSCRIPTION_ID }} arm-client-id: ${{ secrets.ARM_CLIENT_ID }} arm-client-secret: ${{ secrets.ARM_CLIENT_SECRET }} - name: Check instance system info uses: podman-desktop/e2e/.github/actions/instance-system-info@3548105f45def129d5e3aaa5a3d922e09ac892d9 - name: Emulate X session uses: podman-desktop/e2e/.github/actions/emulate-x-session@3548105f45def129d5e3aaa5a3d922e09ac892d9 - name: Download Podman, do not initialize uses: podman-desktop/e2e/.github/actions/download-podman-nightly@952cafee20ca82b1ce48b29c848bac1c31062245 with: podman-image-tag: ${{ env.PDE2E_IMAGE_VERSION }} podman-download-url: ${{ env.PODMAN_URL }} - name: Build Podman Desktop Electron Inspect Enabled binary uses: podman-desktop/e2e/.github/actions/build-podman-desktop@0c1f0a035e0949941fd6abf959ab556ceec13f03 with: fork: ${{ env.PD_FORK }} branch: ${{ env.PD_BRANCH }} env-vars: ${{ env.ENV_VARS }} - name: Run Podman Desktop Playwright E2E tests uses: podman-desktop/e2e/.github/actions/run-playwright-test@15b800edab941d394b32aaaa3f7961bb7db7ec3a with: pde2e-runner-tag: ${{ env.PDE2E_IMAGE_VERSION }} podman-desktop-path: true fork-repo: ${{ env.PD_FORK }} branch-name: ${{ env.PD_BRANCH }} ext-repo: ${{ env.EXT_REPO }} ext-fork: ${{ env.EXT_FORK }} ext-branch: ${{ env.EXT_BRANCH }} ext-tests: ${{ env.EXT_RUN_TESTS_FROM_EXTENSION }} npm-target: ${{ env.NPM_TARGET }} podman-init: ${{ env.PODMAN_INIT }} podman-start: ${{ env.PODMAN_START }} rootful: ${{ env.PODMAN_ROOTFUL }} user-networking: ${{ env.PODMAN_NETWORKING }} podman-provider: 'wsl' env-vars: ${{ env.ENV_VARS }} ci-bot-token: ${{ secrets.PODMAN_DESKTOP_BOT_TOKEN }} - name: Destroy instance if: always() uses: podman-desktop/e2e/.github/actions/destroy-instance@36e440f2ac18193214f4ffa8f7f1c4c0cb8c9446 with: mapt-image: ${{ env.MAPT_IMAGE }} mapt-version: ${{ env.MAPT_VERSION_TAG }} arm-tenant-id: ${{ secrets.ARM_TENANT_ID }} arm-subscription-id: ${{ secrets.ARM_SUBSCRIPTION_ID }} arm-client-id: ${{ secrets.ARM_CLIENT_ID }} arm-client-secret: ${{ secrets.ARM_CLIENT_SECRET }} - name: Publish Test Report uses: mikepenz/action-junit-report@v6 if: always() with: annotate_only: true fail_on_failure: true include_passed: true detailed_summary: true require_tests: true report_paths: '**/*results.xml' - name: Upload test artifacts uses: actions/upload-artifact@v7 if: always() with: name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }} path: | results/* !./**/*.gguf !./**/*.bin !./**/output/videos/* !./**/output/traces/* - name: Upload test videos uses: actions/upload-artifact@v7 if: always() with: name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-videos path: ./**/output/videos/* - name: Upload test traces uses: actions/upload-artifact@v7 if: always() with: name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-traces path: ./**/output/traces/* ================================================ FILE: .github/workflows/build-next.yaml ================================================ # # Copyright (C) 2023-2024 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: CI on: push: branches: - 'main' jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v6.0.2 - uses: pnpm/action-setup@v5 name: Install pnpm with: run_install: false - uses: actions/setup-node@v6 with: node-version: 24 cache: 'pnpm' - name: Execute pnpm run: pnpm install - name: Run Build run: pnpm build - name: Login to ghcr.io run: podman login --username ${{ github.repository_owner }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io - name: Publish Image id: publish-image run: | IMAGE_NAME=ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab IMAGE_NIGHTLY=${IMAGE_NAME}:nightly IMAGE_SHA=${IMAGE_NAME}:${GITHUB_SHA} podman build -t $IMAGE_NIGHTLY . podman push $IMAGE_NIGHTLY podman tag $IMAGE_NIGHTLY $IMAGE_SHA podman push $IMAGE_SHA ================================================ FILE: .github/workflows/compute-model-sizes.yml ================================================ # This is a basic workflow that is manually triggered name: Compute model sizes # Controls when the action will run. Workflow runs when manually triggered using the UI # or API. on: workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "greet" compute: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: - uses: actions/checkout@v6.0.2 # Runs a single command using the runners shell - name: Compute model size run: ./tools/compute-model-sizes.sh ================================================ FILE: .github/workflows/e2e-main-tf.yaml ================================================ # Copyright (C) 2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: PD AI Lab E2E Nightly Testing Farm on: schedule: - cron: '0 0 * * *' workflow_dispatch: inputs: podman_version: default: 'latest' description: 'Podman version to install (e.g., "5.5.2", "5.6.0~rc1"). Use "latest" for stable or "nightly" for the latest development build.' type: string required: true npm_target: description: npm tests target type: choice default: 'e2e' options: - e2e - smoke - instructlab plan: description: plans to run type: choice default: 'default' options: - default - gpu jobs: pd-ai-lab-e2e-testing-farm: name: pd-e2e-testing-farm-ci runs-on: ubuntu-latest timeout-minutes: 180 strategy: fail-fast: false matrix: fedora-version: ['Fedora-42', 'Fedora-43'] plan: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.plan != '' && fromJSON(format('["{0}"]', github.event.inputs.plan)) || fromJSON('["default", "gpu"]') }} steps: - name: Set the default env. variables env: DEFAULT_NPM_TARGET: 'smoke' DEFAULT_PODMAN_VERSION: 'latest' DEFAULT_NODE_VERSION: 'v24.11.1' run: | echo "NPM_TARGET=${{ github.event.inputs.npm_target || env.DEFAULT_NPM_TARGET }}" >> $GITHUB_ENV echo "PLAN=${{ matrix.plan }}" >> $GITHUB_ENV echo "PODMAN_VERSION=${{ github.event.inputs.podman_version || env.DEFAULT_PODMAN_VERSION }}" >> $GITHUB_ENV echo "NODE_VERSION=${{ vars.NODE_VERSION || env.DEFAULT_NODE_VERSION }}" >> $GITHUB_ENV - name: Run Podman Desktop Playwright E2E tests on Testing Farm CI id: run-e2e-tf uses: sclorg/testing-farm-as-github-action@b23f0de29ac969d12411215a983da264b4ced149 #v4.2.0 with: api_key: ${{ secrets.TF_TOKEN }} create_github_summary: "false" compose: ${{ matrix.fedora-version }} tmt_plan_filter: 'name:/tests/tmt/plans/ai-lab-e2e-plan-${{ env.PLAN }}/${{ env.NPM_TARGET }}' variables: COMPOSE=${{ matrix.fedora-version }};ARCH=x86_64;PODMAN_VERSION=${{ env.PODMAN_VERSION }};NODE_VERSION=${{ env.NODE_VERSION }} - name: Extract Testing Farm work ID and base URL if: always() run: | TF_ARTIFACTS_URL="${{ steps.run-e2e-tf.outputs.test_log_url }}" TF_DEFAULT_JUNIT_DEFAULT="${TF_ARTIFACTS_URL}/results-junit.xml" curl -o results-junit.xml "$TF_DEFAULT_JUNIT_DEFAULT" TF_WORK_ID=$(grep -o 'work-${{ env.NPM_TARGET }}[^/"]*' results-junit.xml | head -1) echo "TF_WORK_ID=$TF_WORK_ID" >> $GITHUB_ENV echo "TF_ARTIFACTS_URL=$TF_ARTIFACTS_URL" >> $GITHUB_ENV - name: Download Playwright JUnit report from Testing Farm if: always() run: | TF_PLAYWRIGHT_JUNIT_URL="${{ env.TF_ARTIFACTS_URL }}/${{ env.TF_WORK_ID }}/tests/tmt/plans/ai-lab-e2e-plan-${{ env.PLAN }}/${{ env.NPM_TARGET }}/execute/data/guest/default-0/tests/tmt/tests/${{ env.NPM_TARGET }}-test-1/data/junit-results.xml" curl -o junit-playwright-results.xml "$TF_PLAYWRIGHT_JUNIT_URL" - name: Publish test report to PR if: always() uses: mikepenz/action-junit-report@5b7ee5a21e8674b695313d769f3cbdfd5d4d53a4 #v6.0.0 with: fail_on_failure: true include_passed: true detailed_summary: true annotate_only: true require_tests: true report_paths: '**/junit-playwright-results.xml' - name: Download test artifacts from Testing Farm if: failure() run: | mkdir -p results TF_TEST_DATA_URL="${{ env.TF_ARTIFACTS_URL }}/${{ env.TF_WORK_ID }}/tests/tmt/plans/ai-lab-e2e-plan-${{ env.PLAN }}/${{ env.NPM_TARGET }}/execute/data/guest/default-0/tests/tmt/tests/${{ env.NPM_TARGET }}-test-1/data" TF_TRACES_URL="${TF_TEST_DATA_URL}/traces/" TF_VIDEOS_URL="${TF_TEST_DATA_URL}/videos/" echo "Downloading traces" wget \ --recursive \ --no-parent \ --no-host-directories \ --cut-dirs=10 \ --reject "index.html*" \ --directory-prefix=results \ "$TF_TRACES_URL" echo "Downloading videos" wget \ --recursive \ --no-parent \ --no-host-directories \ --cut-dirs=10 \ --reject "index.html*" \ --directory-prefix=results \ "$TF_VIDEOS_URL" - name: Upload test artifacts if: always() uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ai-lab-testing-farm-artifacts-${{ matrix.fedora-version }}-${{ env.PLAN }} path: | results/* **/junit-playwright-results.xml ================================================ FILE: .github/workflows/e2e-main.yaml ================================================ # # Copyright (C) 2024 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: e2e-tests-main on: push: branches: [main] schedule: - cron: '0 2 * * *' workflow_dispatch: inputs: podman_desktop_repo_args: default: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main' description: 'Podman Desktop repo fork and branch' type: string required: true ext_repo_options: default: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main' description: 'Podman Desktop Extension repo, fork and branch' type: string required: true jobs: e2e-tests: name: Run E2E tests ${{ github.event_name == 'schedule' && '[nightly]' || '' }} runs-on: ubuntu-24.04 steps: - name: Set default env variables env: DEFAULT_PODMAN_DESKTOP_REPO_ARGS: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main' DEFAULT_EXT_REPO_OPTIONS: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main' run: | echo "${{ github.event.inputs.podman_desktop_repo_args || env.DEFAULT_PODMAN_DESKTOP_REPO_ARGS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PD_"kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ github.event.inputs.ext_repo_options || env.DEFAULT_EXT_REPO_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "EXT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV - uses: actions/checkout@v6.0.2 name: Checkout AI Lab - Workflow Dispatch if: github.event_name == 'workflow_dispatch' with: repository: ${{ env.EXT_FORK }}/${{ env.EXT_REPO }} ref: ${{ env.EXT_BRANCH }} path: podman-desktop-extension-ai-lab - uses: actions/checkout@v6.0.2 name: Checkout AI Lab - Push or Schedule if: github.event_name == 'push' || github.event_name == 'schedule' with: path: podman-desktop-extension-ai-lab - uses: actions/checkout@v6.0.2 name: Checkout Podman Desktop with: repository: ${{ env.PD_FORK }}/${{ env.PD_REPO }} ref: ${{ env.PD_BRANCH }} path: podman-desktop - uses: pnpm/action-setup@v5 name: Install pnpm with: run_install: false package_json_file: ./podman-desktop/package.json - uses: actions/setup-node@v6 with: node-version: 24 cache: 'pnpm' cache-dependency-path: | ./podman-desktop ./podman-desktop-extension-ai-lab - name: Update podman run: | echo "ubuntu version from kubic repository to install podman we need (v5)" ubuntu_version='23.10' echo "Add unstable kubic repo into list of available sources and get the repo key" sudo sh -c "echo 'deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - echo "Updating database of packages..." sudo apt-get update -qq echo "install necessary dependencies for criu package which is not part of ${ubuntu_version}" sudo apt-get install -qq libprotobuf32t64 python3-protobuf libnet1 echo "install criu manually from static location" curl -sLO http://archive.ubuntu.com/ubuntu/pool/universe/c/criu/criu_3.16.1-2_amd64.deb && sudo dpkg -i criu_3.16.1-2_amd64.deb echo "installing/update podman package..." sudo apt-get -qq -y install podman || { echo "Start fallback steps for podman nightly installation from a static mirror" && \ sudo sh -c "echo 'deb http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" && \ curl -L "http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - && \ sudo apt-get update && \ sudo apt-get -y install podman; } podman version - name: Revert unprivileged user namespace restrictions in Ubuntu 24.04 run: | # allow unprivileged user namespace sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 - name: Set cgroup_manager to 'cgroupfs' instead of systemd run: | mkdir -p ~/.config/containers cat <> ~/.config/containers/containers.conf [engine] cgroup_manager="cgroupfs" EOT podman info - name: Execute pnpm working-directory: ./podman-desktop run: pnpm install --frozen-lockfile - name: Build Podman Desktop for E2E tests working-directory: ./podman-desktop run: pnpm test:e2e:build - name: Ensure getting current HEAD version of the test framework working-directory: ./podman-desktop-extension-ai-lab/tests/playwright run: pnpm add -D @podman-desktop/tests-playwright@next - name: Execute pnpm in AI Lab Extension working-directory: ./podman-desktop-extension-ai-lab run: pnpm install - name: Build Image working-directory: ./podman-desktop-extension-ai-lab id: build-image run: | pnpm build podman build -t local_ai_lab_image ./ CONTAINER_ID=$(podman create localhost/local_ai_lab_image --entrypoint "") mkdir -p tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins podman export $CONTAINER_ID | tar -x -C tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins/ podman rm -f $CONTAINER_ID podman rmi -f localhost/local_ai_lab_image:latest - name: Free up disk space uses: podman-desktop/e2e/.github/actions/disk-cleanup@6a406f8f24bacffc481553266f9ba8a5293f3077 - name: Run All E2E tests working-directory: ./podman-desktop-extension-ai-lab env: PODMAN_DESKTOP_ARGS: ${{ github.workspace }}/podman-desktop EXTENSION_PREINSTALLED: true run: pnpm test:e2e - name: Publish Test Report uses: mikepenz/action-junit-report@v6 if: always() with: annotate_only: true fail_on_failure: true include_passed: true detailed_summary: true require_tests: true report_paths: '**/*results.xml' - uses: actions/upload-artifact@v7 if: always() with: name: e2e-tests path: | ./**/tests/**/output/ !./**/*.gguf !./**/*.bin !./**/output/videos/* !./**/output/traces/* - name: Upload test videos uses: actions/upload-artifact@v7 if: always() with: name: e2e-tests-videos path: ./**/output/videos/* - name: Upload test traces uses: actions/upload-artifact@v7 if: always() with: name: e2e-tests-traces path: ./**/output/traces/* ================================================ FILE: .github/workflows/llama-stack-playground.yaml ================================================ # # Copyright (C) 2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: llama-stack-playground on: workflow_dispatch: inputs: version: description: 'llama-stack tag to use (e.g. main, v0.2.8,...)' type: string required: true jobs: publish: name: publish runs-on: ubuntu-24.04 steps: - uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 #v5.0.1 with: repository: meta-llama/llama-stack ref: ${{ github.event.inputs.version }} - name: Install qemu dependency run: | sudo apt-get update sudo apt-get install -y qemu-user-static - name: Build manifest and images run: | podman manifest create quay.io/podman-ai-lab/llama-stack-playground:${{ github.event.inputs.version }} podman build --platform linux/amd64,linux/arm64 llama_stack/distribution/ui --manifest quay.io/podman-ai-lab/llama-stack-playground:${{ github.event.inputs.version }} - name: Login to quay.io run: podman login quay.io --username ${{ secrets.QUAY_USERNAME }} --password ${{ secrets.QUAY_PASSWORD }} - name: Push manifest and images to quay.io run: podman manifest push quay.io/podman-ai-lab/llama-stack-playground:${{ github.event.inputs.version }} ================================================ FILE: .github/workflows/pr-check.yaml ================================================ # # Copyright (C) 2024 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: pr-check on: [pull_request] jobs: lint-format-unit: name: linter, formatters and unit tests / ${{ matrix.os }} runs-on: ${{ matrix.os }} timeout-minutes: 40 strategy: fail-fast: false matrix: os: [windows-2022, ubuntu-22.04, macos-14] steps: - uses: actions/checkout@v6.0.2 - uses: pnpm/action-setup@v5 name: Install pnpm with: run_install: false - uses: actions/setup-node@v6 with: node-version: 24 cache: 'pnpm' - name: Execute pnpm run: pnpm install - name: Run linter run: pnpm lint:check - name: Run formatter run: pnpm format:check - name: Run unit tests run: pnpm test:unit - name: Run typecheck run: pnpm typecheck - name: Run svelte check run: pnpm svelte:check # Check we don't have changes in git - name: Check no changes in git if: ${{ matrix.os=='ubuntu-22.04'}} run: | if ! git diff --exit-code; then echo "Found changes in git" exit 1 fi e2e-pr-check: name: e2e tests smoke runs-on: ubuntu-24.04 env: SKIP_INSTALLATION: true steps: - uses: actions/checkout@v6.0.2 with: path: podman-desktop-extension-ai-lab # Set up pnpm - uses: pnpm/action-setup@v5 name: Install pnpm with: run_install: false package_json_file: ./podman-desktop-extension-ai-lab/package.json # Install Node.js - uses: actions/setup-node@v6 with: node-version: 24 # Checkout podman desktop - uses: actions/checkout@v6.0.2 with: repository: containers/podman-desktop ref: main path: podman-desktop - name: Update podman run: | echo "ubuntu version from kubic repository to install podman we need (v5)" ubuntu_version='23.10' echo "Add unstable kubic repo into list of available sources and get the repo key" sudo sh -c "echo 'deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - echo "Updating database of packages..." sudo apt-get update -qq echo "install necessary dependencies for criu package which is not part of ${ubuntu_version}" sudo apt-get install -qq libprotobuf32t64 python3-protobuf libnet1 echo "install criu manually from static location" curl -sLO http://archive.ubuntu.com/ubuntu/pool/universe/c/criu/criu_3.16.1-2_amd64.deb && sudo dpkg -i criu_3.16.1-2_amd64.deb echo "installing/update podman package..." sudo apt-get -qq -y install podman || { echo "Start fallback steps for podman nightly installation from a static mirror" && \ sudo sh -c "echo 'deb http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" && \ curl -L "http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - && \ sudo apt-get update && \ sudo apt-get -y install podman; } podman version - name: Revert unprivileged user namespace restrictions in Ubuntu 24.04 run: | # allow unprivileged user namespace sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 - name: Set cgroup_manager to 'cgroupfs' instead of systemd run: | mkdir -p ~/.config/containers cat <> ~/.config/containers/containers.conf [engine] cgroup_manager="cgroupfs" EOT podman info - name: Install pnpm deps and build Podman Desktop working-directory: ./podman-desktop run: | pnpm install --frozen-lockfile pnpm test:e2e:build - name: Ensure getting current HEAD version of the test framework working-directory: ./podman-desktop-extension-ai-lab/tests/playwright run: | # workaround for https://github.com/containers/podman-desktop-extension-bootc/issues/712 version=$(npm view @podman-desktop/tests-playwright@next version) echo "Version of @podman-desktop/tests-playwright to be used: $version" jq --arg version "$version" '.devDependencies."@podman-desktop/tests-playwright" = $version' package.json > package.json_tmp && mv package.json_tmp package.json - name: Execute pnpm in AI Lab Extension working-directory: ./podman-desktop-extension-ai-lab run: pnpm install --no-frozen-lockfile - name: Build Image working-directory: ./podman-desktop-extension-ai-lab id: build-image run: | pnpm build podman build -t local_ai_lab_image ./ CONTAINER_ID=$(podman create localhost/local_ai_lab_image --entrypoint "") mkdir -p tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins podman export $CONTAINER_ID | tar -x -C tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins/ podman rm -f $CONTAINER_ID podman rmi -f localhost/local_ai_lab_image:latest - name: Free up disk space uses: podman-desktop/e2e/.github/actions/disk-cleanup@6a406f8f24bacffc481553266f9ba8a5293f3077 - name: Run E2E Smoke tests working-directory: ./podman-desktop-extension-ai-lab env: PODMAN_DESKTOP_ARGS: ${{ github.workspace }}/podman-desktop EXTENSION_PREINSTALLED: true run: pnpm test:e2e:smoke - name: Publish Test Report uses: mikepenz/action-junit-report@v6 if: always() with: annotate_only: true fail_on_failure: true include_passed: true detailed_summary: true require_tests: true report_paths: '**/*results.xml' - uses: actions/upload-artifact@v7 if: always() with: name: e2e-pr-check path: | ./**/tests/**/output/ !./**/*.gguf !./**/*.bin !./**/output/videos/* !./**/output/traces/* - name: Upload test videos uses: actions/upload-artifact@v7 if: always() with: name: e2e-pr-check-videos path: ./**/output/videos/* - name: Upload test traces uses: actions/upload-artifact@v7 if: always() with: name: e2e-pr-check-traces path: ./**/output/traces/* ================================================ FILE: .github/workflows/ramalama.yaml ================================================ # # Copyright (C) 2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: ramalama on: schedule: - cron: '0 2 * * *' workflow_dispatch: inputs: tag: default: 'latest' description: 'Ramalama images tag to use' type: string required: true jobs: e2e-check: name: e2e tests runs-on: ubuntu-24.04 env: SKIP_INSTALLATION: true steps: - uses: actions/checkout@v6.0.2 with: path: podman-desktop-extension-ai-lab # Set up pnpm - uses: pnpm/action-setup@v5 name: Install pnpm with: run_install: false package_json_file: ./podman-desktop-extension-ai-lab/package.json # Install Node.js - uses: actions/setup-node@v6 with: node-version: 24 # Checkout podman desktop - uses: actions/checkout@v6.0.2 with: repository: podman-desktop/podman-desktop ref: main path: podman-desktop - name: Update podman run: | echo "ubuntu version from kubic repository to install podman we need (v5)" ubuntu_version='23.10' echo "Add unstable kubic repo into list of available sources and get the repo key" sudo sh -c "echo 'deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - echo "Updating database of packages..." sudo apt-get update -qq echo "install necessary dependencies for criu package which is not part of ${ubuntu_version}" sudo apt-get install -qq libprotobuf32t64 python3-protobuf libnet1 echo "install criu manually from static location" curl -sLO http://archive.ubuntu.com/ubuntu/pool/universe/c/criu/criu_3.16.1-2_amd64.deb && sudo dpkg -i criu_3.16.1-2_amd64.deb echo "installing/update podman package..." sudo apt-get -qq -y install podman || { echo "Start fallback steps for podman nightly installation from a static mirror" && \ sudo sh -c "echo 'deb http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" && \ curl -L "http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - && \ sudo apt-get update && \ sudo apt-get -y install podman; } podman version - name: Revert unprivileged user namespace restrictions in Ubuntu 24.04 run: | # allow unprivileged user namespace sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 - name: Set cgroup_manager to 'cgroupfs' instead of systemd run: | mkdir -p ~/.config/containers cat <> ~/.config/containers/containers.conf [engine] cgroup_manager="cgroupfs" EOT podman info - name: Install pnpm deps and build Podman Desktop working-directory: ./podman-desktop run: | pnpm install --frozen-lockfile pnpm test:e2e:build - name: Ensure getting current HEAD version of the test framework working-directory: ./podman-desktop-extension-ai-lab/tests/playwright run: | # workaround for https://github.com/podman-desktop/podman-desktop-extension-bootc/issues/712 version=$(npm view @podman-desktop/tests-playwright@next version) echo "Version of @podman-desktop/tests-playwright to be used: $version" jq --arg version "$version" '.devDependencies."@podman-desktop/tests-playwright" = $version' package.json > package.json_tmp && mv package.json_tmp package.json - name: Execute pnpm in AI Lab Extension working-directory: ./podman-desktop-extension-ai-lab run: pnpm install --no-frozen-lockfile - name: Update ramalama image references in AI Lab Extension working-directory: ./podman-desktop-extension-ai-lab run: sed -i -E "s/(@sha256:[0-9a-f]+)/:${{ github.event_name != 'workflow_dispatch' && 'latest' || github.event.inputs.tag }}/g" packages/backend/src/assets/inference-images.json - name: Build Image working-directory: ./podman-desktop-extension-ai-lab id: build-image run: | pnpm build podman build -t local_ai_lab_image ./ CONTAINER_ID=$(podman create localhost/local_ai_lab_image --entrypoint "") mkdir -p tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins podman export $CONTAINER_ID | tar -x -C tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins/ podman rm -f $CONTAINER_ID podman rmi -f localhost/local_ai_lab_image:latest - name: Free up disk space uses: podman-desktop/e2e/.github/actions/disk-cleanup@6a406f8f24bacffc481553266f9ba8a5293f3077 - name: Run E2E tests working-directory: ./podman-desktop-extension-ai-lab env: PODMAN_DESKTOP_ARGS: ${{ github.workspace }}/podman-desktop EXTENSION_PREINSTALLED: true run: pnpm test:e2e - name: Publish Test Report uses: mikepenz/action-junit-report@v6 if: always() with: annotate_only: true fail_on_failure: true include_passed: true detailed_summary: true require_tests: true report_paths: '**/*results.xml' - uses: actions/upload-artifact@v7 if: always() with: name: e2e-check path: | ./**/tests/**/output/ !./**/*.gguf !./**/*.bin !./**/output/videos/* !./**/output/traces/* - name: Upload test videos uses: actions/upload-artifact@v7 if: always() with: name: e2e-check-videos path: ./**/output/videos/* - name: Upload test traces uses: actions/upload-artifact@v7 if: always() with: name: e2e-check-traces path: ./**/output/traces/* ================================================ FILE: .github/workflows/recipe-catalog-change-cleanup.yaml ================================================ name: recipe-catalog-change-cleanup on: workflow_run: workflows: ["recipe-catalog-change-windows-trigger"] types: - completed jobs: extract-context: runs-on: ubuntu-24.04 outputs: extract-context: ${{ steps.prepare-context.outputs.extract-context }} trigger-template: ${{ steps.prepare-context.outputs.trigger-template }} steps: - name: Prepare context id: prepare-context env: WORKFLOW_RUN: ${{ toJson(github.event.workflow_run) }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | echo "Workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}" echo "Fork owner: ${{ fromJson(env.WORKFLOW_RUN).head_repository.owner.login }}" echo "Fork repo: ${{ fromJson(env.WORKFLOW_RUN).head_repository.name }}" echo "Fork branch: ${{ fromJson(env.WORKFLOW_RUN).head_branch }}" echo "Commit SHA: ${{ fromJson(env.WORKFLOW_RUN).head_sha }}" echo "Base repo: ${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}" echo "Conclusion: ${{ fromJson(env.WORKFLOW_RUN).conclusion }}" # Fetch job conclusions using the GitHub CLI echo "Fetching jobs for workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}" gh api \ repos/${{ github.repository }}/actions/runs/${{ fromJson(env.WORKFLOW_RUN).id }}/jobs \ --jq '.jobs[] | "\(.name)=\(.conclusion)"' | while read -r line; do echo "$line" >> $GITHUB_OUTPUT done cat $GITHUB_OUTPUT cleanup: runs-on: ubuntu-24.04 needs: extract-context if: ${{ github.event.workflow_run.conclusion == 'skipped' || (github.event.workflow_run.conclusion == 'success' && needs.extract-context.outputs.trigger-template == 'skipped') }} steps: - name: Remove skipped or cancelled workflow run env: WORKFLOW_RUN: ${{ toJson(github.event.workflow_run) }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | echo "Cleaning up workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}" gh run delete ${{ fromJson(env.WORKFLOW_RUN).id }} --repo ${{ fromJson(env.WORKFLOW_RUN).repository.full_name }} echo "Workflow run ID ${{ fromJson(env.WORKFLOW_RUN).id }} has been cleaned up." ================================================ FILE: .github/workflows/recipe-catalog-change-template.yaml ================================================ name: Run recipe tests on catalog change on: workflow_call: inputs: trigger-workflow-run-id: required: true type: string trigger-workflow-fork: required: true type: string trigger-workflow-repo-name: required: true type: string trigger-workflow-branch: required: true type: string trigger-workflow-commit-sha: required: true type: string trigger-workflow-base-repo: required: true type: string pd-fork: required: false type: string pd-branch: required: false type: string pd-env-vars: required: false type: string podman-options: required: false type: string podman-download-url: required: false type: string ext_tests_options: required: false type: string npm-target: required: false type: string pde2e-image-version: required: false type: string mapt_params: required: false type: string jobs: windows: name: recipe-catalog-windows-${{ matrix.windows-version }}-${{ matrix.windows-featurepack }} runs-on: ubuntu-24.04 strategy: fail-fast: false matrix: windows-version: ['11'] windows-featurepack: ['25h2-ent'] steps: - name: Add PR check status env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | status_context="catalog-change-windows-matrix-${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}" echo "status_context=${status_context}" >> "$GITHUB_ENV" set -xuo # Status msg data="{\"state\":\"pending\"" data="${data},\"description\":\"Running recipe tests on catalog change on Windows ${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}\"" data="${data},\"context\":\"$status_context\"" data="${data},\"target_url\":\"https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}" # Create status by API call curl -L -v -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ https://api.github.com/repos/${{ inputs.trigger-workflow-base-repo }}/statuses/${{ inputs.trigger-workflow-commit-sha }} \ -d "${data}" - name: Get Podman version used by Podman Desktop run: | version=$(curl https://raw.githubusercontent.com/containers/podman-desktop/main/extensions/podman/packages/extension/src/podman5.json | jq -r '.version') echo "Default Podman Version from Podman Desktop: ${version}" echo "PD_PODMAN_VERSION=${version}" >> $GITHUB_ENV - name: Set the default env. variables env: DEFAULT_FORK: 'containers' DEFAULT_BRANCH: 'main' DEFAULT_NPM_TARGET: 'test:e2e' DEFAULT_ENV_VARS: 'TEST_PODMAN_MACHINE=true,ELECTRON_ENABLE_INSPECT=true' DEFAULT_PODMAN_OPTIONS: 'INIT=1,START=1,ROOTFUL=1,NETWORKING=0' DEFAULT_EXT_TESTS_OPTIONS: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=1,EXT_TEST_GPU_SUPPORT_ENABLED=0' DEFAULT_EXT_REPO_OPTIONS: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main' DEFAULT_PODMAN_VERSION: "${{ env.PD_PODMAN_VERSION || '5.3.2' }}" DEFAULT_URL: "https://github.com/containers/podman/releases/download/v$DEFAULT_PODMAN_VERSION/podman-$DEFAULT_PODMAN_VERSION-setup.exe" DEFAULT_PDE2E_IMAGE_VERSION: 'v0.0.3-windows' DEFAULT_MAPT_PARAMS: "IMAGE=${{ vars.MAPT_IMAGE || 'quay.io/redhat-developer/mapt' }};VERSION_TAG=${{ vars.MAPT_VERSION_TAG || 'v0.9.7' }};CPUS=${{ vars.MAPT_CPUS || '4' }};MEMORY=${{ vars.MAPT_MEMORY || '32' }};EXCLUDED_REGIONS=\"${{ vars.MAPT_EXCLUDED_REGIONS || 'westindia,centralindia,southindia,australiacentral,australiacentral2,australiaeast,australiasoutheast,southafricanorth,southafricawest' }}\"" run: | echo "FORK=${{ inputs.pd-fork || env.DEFAULT_FORK }}" >> $GITHUB_ENV echo "BRANCH=${{ inputs.pd-branch || env.DEFAULT_BRANCH }}" >> $GITHUB_ENV echo "NPM_TARGET=${{ inputs.npm-target || env.DEFAULT_NPM_TARGET }}" >> $GITHUB_ENV echo "ENV_VARS=${{ inputs.pd-env-vars || env.DEFAULT_ENV_VARS }}" >> $GITHUB_ENV echo "PODMAN_URL=${{ inputs.podman-download-url || env.DEFAULT_URL }}" >> $GITHUB_ENV echo "PDE2E_IMAGE_VERSION=${{ inputs.pde2e-image-version || env.DEFAULT_PDE2E_IMAGE_VERSION }}" >> $GITHUB_ENV if [[ -z "${{ inputs.trigger-workflow-repo-name }}" ]] && [[ -z "${{ inputs.trigger-workflow-fork }}" ]] && [[ -z "${{ inputs.trigger-workflow-branch }}" ]]; then echo "DEFAULT_EXT_REPO_OPTIONS=REPO=${{ inputs.trigger-workflow-repo-name }},FORK=${{ inputs.trigger-workflow-fork }},BRANCH=${{ inputs.trigger-workflow-branch }}" >> $GITHUB_ENV fi echo "${{ github.event.inputs.ext_tests_options || env.DEFAULT_EXT_TESTS_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ env.DEFAULT_PODMAN_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PODMAN_"kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ inputs.podman-options || env.DEFAULT_EXT_REPO_OPTIONS }}" | awk -F ',' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "EXT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV echo "${{ github.event.inputs.mapt_params || env.DEFAULT_MAPT_PARAMS }}" | awk -F ';' \ '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "MAPT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV - name: Create instance run: | # Create instance podman run -d --name windows-create --rm \ -v ${PWD}:/workspace:z \ -e ARM_TENANT_ID=${{ secrets.ARM_TENANT_ID }} \ -e ARM_SUBSCRIPTION_ID=${{ secrets.ARM_SUBSCRIPTION_ID }} \ -e ARM_CLIENT_ID=${{ secrets.ARM_CLIENT_ID }} \ -e ARM_CLIENT_SECRET='${{ secrets.ARM_CLIENT_SECRET }}' \ --user 0 \ ${{ env.MAPT_IMAGE }}:${{ env.MAPT_VERSION_TAG }} azure \ windows create \ --project-name 'windows-desktop' \ --backed-url 'file:///workspace' \ --conn-details-output '/workspace' \ --windows-version '${{ matrix.windows-version }}' \ --windows-featurepack '${{ matrix.windows-featurepack }}' \ --cpus ${{ env.MAPT_CPUS }} \ --memory ${{ env.MAPT_MEMORY }} \ --nested-virt \ --tags project=podman-desktop \ --spot-excluded-regions ${{ env.MAPT_EXCLUDED_REGIONS }} \ --spot # Check logs podman logs -f windows-create - name: Check instance system info run: | ssh -i id_rsa \ -o StrictHostKeyChecking=no \ -o UserKnownHostsFile=/dev/null \ -o ServerAliveInterval=30 \ -o ServerAliveCountMax=1200 \ $(cat username)@$(cat host) "systeminfo" - name: Emulate X session run: | # use fake rdp to emulate an active x session podman run -d --name x-session \ -e RDP_HOST=$(cat host) \ -e RDP_USER=$(cat username) \ -e RDP_PASSWORD=$(cat userpassword) \ quay.io/rhqp/frdp:v0.0.1 # Wait until the x session has been created podman wait --condition running x-session # Check logs for the x session podman logs x-session - name: Download Podman, do not initialize run: | podman run --rm -d --name pde2e-podman-run \ -e TARGET_HOST=$(cat host) \ -e TARGET_HOST_USERNAME=$(cat username) \ -e TARGET_HOST_KEY_PATH=/data/id_rsa \ -e TARGET_FOLDER=pd-e2e \ -e TARGET_CLEANUP=false \ -e TARGET_RESULTS=results \ -e OUTPUT_FOLDER=/data \ -e DEBUG=true \ -v $PWD:/data:z \ quay.io/odockal/pde2e-podman:${{ env.PDE2E_IMAGE_VERSION }} \ pd-e2e/podman.ps1 \ -downloadUrl ${{ env.PODMAN_URL }} \ -targetFolder pd-e2e \ -resultsFolder results \ -initialize 0 \ -rootful 0 \ -start 0 \ -installWSL 0 # check logs podman logs -f pde2e-podman-run - name: Build Podman Desktop Electron Inspect Enabled binary run: | podman run --rm -d --name pde2e-builder-run \ -e TARGET_HOST=$(cat host) \ -e TARGET_HOST_USERNAME=$(cat username) \ -e TARGET_HOST_KEY_PATH=/data/id_rsa \ -e TARGET_FOLDER=pd-e2e \ -e TARGET_CLEANUP=false \ -e TARGET_RESULTS=results \ -e OUTPUT_FOLDER=/data \ -e DEBUG=true \ -v $PWD:/data:z \ quay.io/odockal/pde2e-builder:${{ env.PDE2E_IMAGE_VERSION }} \ pd-e2e/builder.ps1 \ -targetFolder pd-e2e \ -resultsFolder results \ -fork ${{ env.FORK }} \ -branch ${{ env.BRANCH }} \ -envVars ${{ env.ENV_VARS }} # check logs podman logs -f pde2e-builder-run - name: Run Podman Desktop Playwright E2E tests run: | podman run -d --name pde2e-runner-run \ -e TARGET_HOST=$(cat host) \ -e TARGET_HOST_USERNAME=$(cat username) \ -e TARGET_HOST_KEY_PATH=/data/id_rsa \ -e TARGET_FOLDER=pd-e2e \ -e TARGET_RESULTS=results \ -e OUTPUT_FOLDER=/data \ -e DEBUG=true \ -v $PWD:/data:z \ quay.io/odockal/pde2e-runner:${{ env.PDE2E_IMAGE_VERSION }} \ pd-e2e/runner.ps1 \ -targetFolder pd-e2e \ -resultsFolder results \ -podmanPath $(cat results/podman-location.log) \ -pdPath "$(cat results/pde2e-binary-path.log | tr '\n' " ")" \ -fork ${{ env.FORK }} \ -branch ${{ env.BRANCH }} \ -extRepo ${{ env.EXT_REPO }} \ -extFork ${{ env.EXT_FORK }} \ -extBranch ${{ env.EXT_BRANCH }} \ -extTests ${{ env.EXT_RUN_TESTS_FROM_EXTENSION }} \ -npmTarget ${{ env.NPM_TARGET }} \ -initialize ${{ env.PODMAN_INIT }} \ -rootful ${{ env.PODMAN_ROOTFUL }} \ -start ${{ env.PODMAN_START }} \ -userNetworking ${{ env.PODMAN_NETWORKING }} \ -envVars ${{ env.ENV_VARS }} \ -runAsAdmin ${{ env.EXT_RUN_TESTS_AS_ADMIN }} # check logs podman logs -f pde2e-runner-run - name: Publish Test Report id: test-report uses: mikepenz/action-junit-report@v6 if: always() # always run even if the previous step fails with: annotate_only: true fail_on_failure: true include_passed: true detailed_summary: true require_tests: true report_paths: '**/*results.xml' - name: Update status of the PR check if: always() env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -xuo # Status msg data="{\"state\":\"success\"" if [[ ${{ steps.test-report.outcome }} != "success" ]]; then data="{\"state\":\"failure\"" fi data="${data},\"description\":\"Finished recipe tests on catalog change on Windows ${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}\"" data="${data},\"context\":\"${{ env.status_context }}\"" data="${data},\"target_url\":\"https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}" # Create status by API call curl -L -v -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ env.GH_TOKEN }}" \ https://api.github.com/repos/${{ inputs.trigger-workflow-base-repo }}/statuses/${{ inputs.trigger-workflow-commit-sha }} \ -d "${data}" - name: Destroy instance if: always() run: | # Destroy instance podman run -d --name windows-destroy --rm \ -v ${PWD}:/workspace:z \ -e ARM_TENANT_ID=${{ secrets.ARM_TENANT_ID }} \ -e ARM_SUBSCRIPTION_ID=${{ secrets.ARM_SUBSCRIPTION_ID }} \ -e ARM_CLIENT_ID=${{ secrets.ARM_CLIENT_ID }} \ -e ARM_CLIENT_SECRET='${{ secrets.ARM_CLIENT_SECRET }}' \ --user 0 \ ${{ env.MAPT_IMAGE }}:${{ env.MAPT_VERSION_TAG }} azure \ windows destroy \ --project-name 'windows-desktop' \ --backed-url 'file:///workspace' # Check logs podman logs -f windows-destroy - name: Upload test artifacts uses: actions/upload-artifact@v7 if: always() with: name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }} path: | results/* !./**/*.gguf !./**/*.bin !./**/output/videos/* !./**/output/traces/* - name: Upload test videos uses: actions/upload-artifact@v7 if: always() with: name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-videos path: ./**/output/videos/* - name: Upload test traces uses: actions/upload-artifact@v7 if: always() with: name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-traces path: ./**/output/traces/* ================================================ FILE: .github/workflows/recipe-catalog-change-trigger.yaml ================================================ name: recipe-catalog-change-windows-trigger on: workflow_run: workflows: ["pr-check"] types: - completed jobs: extract-context: runs-on: ubuntu-24.04 if: ${{ github.event.workflow_run.conclusion == 'success' }} outputs: workflow-run-id: ${{ steps.parse-event.outputs.workflow-run-id }} fork-owner: ${{ steps.parse-event.outputs.fork-owner }} fork-repo: ${{ steps.parse-event.outputs.fork-repo }} fork-branch: ${{ steps.parse-event.outputs.fork-branch }} commit-sha: ${{ steps.parse-event.outputs.commit-sha }} base-repo: ${{ steps.parse-event.outputs.base-repo }} changes-detected: ${{ steps.parse-event.outputs.changes-detected }} steps: - name: Parse event data id: parse-event env: WORKFLOW_RUN: ${{ toJson(github.event.workflow_run) }} GH_TOKEN: ${{ github.token }} run: | echo "Workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}" echo "workflow-run-id=${{ fromJson(env.WORKFLOW_RUN).id }}" >> $GITHUB_OUTPUT echo "Fork owner: ${{ fromJson(env.WORKFLOW_RUN).head_repository.owner.login }}" echo "fork-owner=${{ fromJson(env.WORKFLOW_RUN).head_repository.owner.login }}" >> $GITHUB_OUTPUT echo "Fork repo: ${{ fromJson(env.WORKFLOW_RUN).head_repository.name }}" echo "fork-repo=${{ fromJson(env.WORKFLOW_RUN).head_repository.name }}" >> $GITHUB_OUTPUT echo "Fork branch: ${{ fromJson(env.WORKFLOW_RUN).head_branch }}" echo "fork-branch=${{ fromJson(env.WORKFLOW_RUN).head_branch }}" >> $GITHUB_OUTPUT echo "Commit SHA: ${{ fromJson(env.WORKFLOW_RUN).head_sha }}" echo "commit-sha=${{ fromJson(env.WORKFLOW_RUN).head_sha }}" >> $GITHUB_OUTPUT echo "Base repo: ${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}" echo "base-repo=${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}" >> $GITHUB_OUTPUT git clone "https://www.github.com/${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}" "${{ fromJson(env.WORKFLOW_RUN).repository.name }}" --depth 1 cd "${{ fromJson(env.WORKFLOW_RUN).repository.name }}" git remote add upstream "https://www.github.com/${{ fromJson(env.WORKFLOW_RUN).head_repository.full_name }}" git fetch upstream git diff --name-only upstream/${{ fromJson(env.WORKFLOW_RUN).head_branch }} HEAD > changes.txt if grep -qe 'packages/backend/src/assets/ai.json' changes.txt; then echo "Changes detected in ai.json" echo "changes-detected=true" >> $GITHUB_OUTPUT else echo "No changes detected in ai.json" echo "changes-detected=false" >> $GITHUB_OUTPUT fi trigger-template: needs: extract-context uses: containers/podman-desktop-extension-ai-lab/.github/workflows/recipe-catalog-change-template.yaml@main if: ${{ needs.extract-context.outputs.changes-detected == 'true' }} strategy: fail-fast: false with: trigger-workflow-run-id: ${{ needs.extract-context.outputs.workflow-run-id }} trigger-workflow-fork: ${{ needs.extract-context.outputs.fork-owner }} trigger-workflow-repo-name: ${{ needs.extract-context.outputs.fork-repo }} trigger-workflow-branch: ${{ needs.extract-context.outputs.fork-branch }} trigger-workflow-commit-sha: ${{ needs.extract-context.outputs.commit-sha }} trigger-workflow-base-repo: ${{ needs.extract-context.outputs.base-repo }} ext_tests_options: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=0,EXT_TEST_GPU_SUPPORT_ENABLED=0' secrets: inherit ================================================ FILE: .github/workflows/release.yaml ================================================ # # Copyright (C) 2024-2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 name: release on: workflow_dispatch: inputs: version: description: 'Version to release' required: true branch: description: 'Branch to use for the release' required: true default: main env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} jobs: tag: name: Tagging runs-on: ubuntu-24.04 outputs: githubTag: ${{ steps.TAG_UTIL.outputs.githubTag}} extVersion: ${{ steps.TAG_UTIL.outputs.extVersion}} releaseId: ${{ steps.create_release.outputs.id}} steps: - uses: actions/checkout@v6.0.2 with: ref: ${{ github.event.inputs.branch }} - name: Generate tag utilities id: TAG_UTIL run: | TAG_PATTERN=${{ github.event.inputs.version }} echo "githubTag=v$TAG_PATTERN" >> ${GITHUB_OUTPUT} echo "extVersion=$TAG_PATTERN" >> ${GITHUB_OUTPUT} - name: tag run: | git config --local user.name ${{ github.actor }} # Add the new version in package.json file sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" package.json sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" packages/backend/package.json sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" packages/frontend/package.json sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" tests/playwright/package.json git add package.json git add packages/backend/package.json git add packages/frontend/package.json git add tests/playwright/package.json # commit the changes git commit -m "chore: 🥁 tagging ${{ steps.TAG_UTIL.outputs.githubTag }} 🥳" echo "Tagging with ${{ steps.TAG_UTIL.outputs.githubTag }}" git tag ${{ steps.TAG_UTIL.outputs.githubTag }} git push origin ${{ steps.TAG_UTIL.outputs.githubTag }} - name: Create Release id: create_release uses: ncipollo/release-action@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag: ${{ steps.TAG_UTIL.outputs.githubTag }} name: ${{ steps.TAG_UTIL.outputs.githubTag }} draft: true prerelease: false - name: Create the PR to bump the version in the main branch (only if we're tagging from main branch) if: ${{ github.event.inputs.branch == 'main' }} run: | git config --local user.name ${{ github.actor }} CURRENT_VERSION=$(echo "${{ steps.TAG_UTIL.outputs.extVersion }}") tmp=${CURRENT_VERSION%.*} minor=${tmp#*.} bumpedVersion=${CURRENT_VERSION%%.*}.$((minor + 1)).0 bumpedBranchName="bump-to-${bumpedVersion}" git checkout -b "${bumpedBranchName}" sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" package.json sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" packages/backend/package.json sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" packages/frontend/package.json sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" tests/playwright/package.json git add package.json git add packages/backend/package.json git add packages/frontend/package.json git add tests/playwright/package.json git commit -s --amend -m "chore: bump version to ${bumpedVersion}" git push origin "${bumpedBranchName}" echo -e "📢 Bump version to ${bumpedVersion}\n\n${{ steps.TAG_UTIL.outputs.extVersion }} has been released.\n\n Time to switch to the new ${bumpedVersion} version 🥳" > /tmp/pr-title pullRequestUrl=$(gh pr create --title "chore: 📢 Bump version to ${bumpedVersion}" --body-file /tmp/pr-title --head "${bumpedBranchName}" --base "main") echo "📢 Pull request created: ${pullRequestUrl}" echo "➡️ Flag the PR as being ready for review" gh pr ready "${pullRequestUrl}" echo "🔅 Mark the PR as being ok to be merged automatically" gh pr merge "${pullRequestUrl}" --auto --rebase git checkout ${{ steps.TAG_UTIL.outputs.githubTag }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} build: needs: [tag] runs-on: ubuntu-latest steps: - uses: actions/checkout@v6.0.2 with: ref: ${{ needs.tag.outputs.githubTag }} - uses: pnpm/action-setup@v5 name: Install pnpm with: run_install: false - uses: actions/setup-node@v6 with: node-version: 24 cache: 'pnpm' - name: Execute yarn run: pnpm install - name: Run Build run: pnpm build - name: Login to ghcr.io run: podman login --username ${{ github.repository_owner }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io - name: Build Image id: build-image run: | podman build -t ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:${{ needs.tag.outputs.extVersion }} . podman push ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:${{ needs.tag.outputs.extVersion }} podman tag ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:${{ needs.tag.outputs.extVersion }} ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:latest podman push ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:latest release: needs: [tag, build] name: Release runs-on: ubuntu-24.04 steps: - name: id run: echo the release id is ${{ needs.tag.outputs.releaseId}} - name: Publish release uses: StuYarrow/publish-release@v1.1.2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: id: ${{ needs.tag.outputs.releaseId}} ================================================ FILE: .github/workflows/update-ramalama-references.sh ================================================ #!/usr/bin/env bash # # Copyright (C) 2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 # Script to update ramalama image references in inference-images.json set -euo pipefail JSON_PATH="packages/backend/src/assets/inference-images.json" TMP_JSON="${JSON_PATH}.tmp" TAG=$1 # Images and their keys in the JSON IMAGES=( "whispercpp:ramalama/ramalama-whisper-server:default" "llamacpp:ramalama/ramalama-llama-server:default" "llamacpp:ramalama/cuda-llama-server:cuda" "openvino:ramalama/openvino:default" ) cp "$JSON_PATH" "$TMP_JSON" for entry in "${IMAGES[@]}"; do IFS=":" read -r key image jsonkey <<< "$entry" digest=$(curl -s "https://quay.io/v2/$image/manifests/$TAG" -H 'Accept: application/vnd.oci.image.index.v1+json' --head | grep -i Docker-Content-Digest | awk -e '{ print $2 }' | tr -d '\r') # Update the JSON file with the new digest jq --arg img "quay.io/$image" --arg dig "$digest" --arg key "$key" --arg jsonkey "$jsonkey" \ '(.[$key][$jsonkey]) = ($img + "@" + $dig)' \ "$TMP_JSON" > "$TMP_JSON.new" && mv "$TMP_JSON.new" "$TMP_JSON" done # Compare and update if changed if cmp -s "$JSON_PATH" "$TMP_JSON"; then echo "No update needed: digests are up to date." rm "$TMP_JSON" exit 0 else mv "$TMP_JSON" "$JSON_PATH" echo "Updated inference-images.json with latest digests." exit 10 fi ================================================ FILE: .github/workflows/update-ramalama-references.yaml ================================================ # # Copyright (C) 2025 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 # This workflow automatically updates ramalama image digests in inference-images.json # and creates a pull request with the changes. name: update-ramalama-references on: schedule: - cron: '0 3 * * *' # Runs daily at 03:00 UTC workflow_dispatch: permissions: contents: write jobs: update-references: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 # v5.0.1 - name: Get latest ramalama version id: get_ramalama_version run: | RAMALAMA_VERSION=$(curl -s https://quay.io/v2/ramalama/ramalama-llama-server/tags/list -s | jq .tags[] | grep -E '^"[0-9]+\.[0-9]+\.[0-9]+"$' | sort -V | tail -n 1 | tr -d '"') echo "RAMALAMA_VERSION=${RAMALAMA_VERSION}" >> $GITHUB_OUTPUT - name: Check if PR already exists id: pr_exists uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 with: script: | const branch = `update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}`; const { data: pulls } = await github.rest.pulls.list({ owner: context.repo.owner, repo: context.repo.repo, head: `${context.repo.owner}:${branch}`, state: 'open', }); if (pulls.length > 0) { core.setOutput('exists', 'true'); } else { core.setOutput('exists', 'false'); } - name: Update ramalama image references in inference-images.json id: update_digests if: steps.pr_exists.outputs.exists == 'false' run: | bash .github/workflows/update-ramalama-references.sh "${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" continue-on-error: true - name: Commit changes if: steps.pr_exists.outputs.exists == 'false' && steps.update_digests.outcome == 'failure' run: | git config --global user.email "github-actions[bot]@users.noreply.github.com" git config --global user.name "github-actions[bot]" git checkout -b "update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" git add packages/backend/src/assets/inference-images.json git commit -m "chore: update ramalama image references ${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" git push origin "update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" - name: Create Pull Request if: steps.pr_exists.outputs.exists == 'false' && steps.update_digests.outcome == 'failure' run: | echo -e "update ramalama image references to ${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" > /tmp/pr-title pullRequestUrl=$(gh pr create --title "chore: update ramalama image references to ${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" --body-file /tmp/pr-title --head "update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" --base "main") echo "📢 Pull request created: ${pullRequestUrl}" echo "➡️ Flag the PR as being ready for review" gh pr ready "${pullRequestUrl}" env: GITHUB_TOKEN: ${{ secrets.PODMAN_DESKTOP_BOT_TOKEN }} ================================================ FILE: .gitignore ================================================ node_modules .DS_Store dist .eslintcache **/coverage .idea output ================================================ FILE: .husky/commit-msg ================================================ #!/bin/sh # # Copyright (C) 2024 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 set -u # avoid [[ which is not POSIX sh. if test "$#" != 1; then echo "$0 requires an argument." exit 1 fi if test ! -f "$1"; then echo "file does not exist: $1" exit 1 fi pnpm commitlint --edit "$1" SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') grep -qs "^$SOB" "$1" || echo "$SOB" >>"$1" # Catches duplicate Signed-off-by lines. test "" = "$(grep '^Signed-off-by: ' "$1" | sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { echo >&2 Duplicate Signed-off-by lines. exit 1 } ================================================ FILE: .husky/pre-commit ================================================ # # Copyright (C) 2024 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 pnpm lint-staged ================================================ FILE: .npmrc ================================================ node-linker=hoisted ================================================ FILE: .prettierrc ================================================ { "svelteSortOrder" : "options-styles-scripts-markup", "svelteStrictMode": true, "svelteAllowShorthand": false, "svelteIndentScriptAndStyle": false, "bracketSameLine": true, "singleQuote": true, "arrowParens": "avoid", "printWidth": 120, "trailingComma": "all", "plugins": ["prettier-plugin-svelte"] } ================================================ FILE: .vscode/settings.json ================================================ { "typescript.preferences.importModuleSpecifier": "non-relative" } ================================================ FILE: CODE-OF-CONDUCT.md ================================================ Podman Desktop Extension AI Lab Project Community Code of Conduct The Podman Desktop Extension AI Lab Project follows the [Containers Community Code of Conduct](https://github.com/containers/common/blob/main/CODE-OF-CONDUCT.md). ================================================ FILE: Containerfile ================================================ # # Copyright (C) 2024 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 FROM scratch as builder COPY packages/backend/dist/ /extension/dist COPY packages/backend/package.json /extension/ COPY packages/backend/media/ /extension/media COPY LICENSE /extension/ COPY packages/backend/icon.png /extension/ COPY packages/backend/brain.woff2 /extension/ COPY README.md /extension/ COPY api/openapi.yaml /extension/api/ FROM scratch LABEL org.opencontainers.image.title="AI Lab" \ org.opencontainers.image.description="AI Lab" \ org.opencontainers.image.vendor="Red Hat" \ io.podman-desktop.api.version=">= 1.8.0" COPY --from=builder /extension /extension ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MIGRATION.md ================================================ # Migration guide ## ℹ️ ApplicationCatalog Before **Podman AI Lab** `v1.2.0` the [user-catalog](./PACKAGING-GUIDE.md#applicationcatalog) was not versioned. Starting from `v1.2.0` the user-catalog require to have a `version` property. > [!NOTE] > The `user-catalog.json` file can be found in `~/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab`. The list of catalog versions can be found in [packages/backend/src/utils/catalogUtils.ts](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/utils/catalogUtils.ts) The catalog has its own version number, as we may not require to update it with every update. It will follow semantic versioning convention. ## `None` to Catalog `1.0` `None` represents any catalog version prior to the first versioning. Version `1.0` of the catalog adds an important property to models `backend`, defining the type of framework required by the model to run (E.g. LLamaCPP, WhisperCPP). ### 🛠️ How to migrate You can either delete any existing `user-catalog` by deleting the `~/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab/user-catalog.json`. > [!WARNING] > This will remove the models you have imported from the catalog. You will be able to import it again afterward. If you want to keep the data, you can migrate it by updating certain properties within the recipes and models fields. ### Recipes The recipe object has a new property `backend` which defines which framework is required. Value accepted are `llama-cpp`, `whisper-cpp` and `none`. Moreover, the `models` property has been changed to `recommended`. > [!TIP] > Before Podman AI Lab version v1.2 recipes uses the `models` property to list the models compatible. Now all models using the same `backend` could be used. We introduced `recommended` to highlight certain models. **Example** ```diff { "version": "1.0", "recipes": [{ "id": "chatbot", "description" : "This is a Streamlit chat demo application.", "name" : "ChatBot", "repository": "https://github.com/containers/ai-lab-recipes", - "models": [ + "recommended": [ "hf.instructlab.granite-7b-lab-GGUF", "hf.instructlab.merlinite-7b-lab-GGUF" ] + "backend": "llama-cpp" }], "models": [], "categories": [] } ``` ### Models The model object has also the new property `backend`, which defines which framework is required. Additionally, we have enhanced security by introducing a new optional `sha256` property. > [!TIP] > To get the sha256 of a model, you can use the `sha256sum [model-file]` command in a terminal. **Example** ```diff { "version": "1.0", "recipes": [], "models": [{ "id": "hf.instructlab.granite-7b-lab-GGUF", "name": "instructlab/granite-7b-lab-GGUF", "description": "# InstructLab Granite 7B", "hw": "CPU", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf", "memory": 4080218931, "properties": { "chatFormat": "openchat" }, + "sha256": "6adeaad8c048b35ea54562c55e454cc32c63118a32c7b8152cf706b290611487", + "backend": "llama-cpp" }], "categories": [] } ``` ================================================ FILE: PACKAGING-GUIDE.md ================================================ # Packaging guide ## ApplicationCatalog AI Lab uses an internal catalog embedded within the application. This catalog is loaded by AI Lab and displayed when you access the catalog page. The format of the catalog is JSON. It is possible for users to have a custom version of the catalog. In order to do so, copy the file located at https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/assets/ai.json to $HOME/podman-desktop/ai-lab/catalog.json and AI Lab will use it instead of the embedded one. Any change done to this file will also be automatically loaded by AI Lab. ### Format of the catalog file The catalog file has three main elements: categories, models and recipes. Each of these elements is represented in the JSON file as an array. The catalog is `versioned`. Current version can be found in [ai.json](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/assets/ai.json#L2). > :warning: when the version of the catalog is undefined or different from the current, the user-catalog will be ignored. #### Categories This is the top level construct of the catalog UI. Recipes are grouped into categories. A category represents the kind of AI application. Although the list of categories provided by default by AI Lab represents the AI landscape, it is possible to add new categories. A category has three main attributes: an id (which should be unique among categories), a description and a name. The category id attribute will then be used to attach a recipe to one or several categories. #### Models The catalog also lists the models that may be associated to recipes. A model is also a first class citizen in AI Lab as they will be listed in the Models page and can be tested through the playground. A model has the following attributes: - ```id```: a unique identifier for the model - ```name```: the model name - ```description```: a detailed description about the model - ```registry```: the model registry where the model is stored - ```popularity```: an integer field giving the rating of the model. Can be thought as the number of stars - ```license```: the license under which the model is available - ```url```: the URL used to download the model - ```memory```: the memory footprint of the model in bytes, as computed by the workflow `.github/workflows/compute-model-sizes.yaml` - ```sha256```: the SHA-256 checksum to be used to verify the downloaded model is identical to the original. It is optional and it must be HEX encoded #### Recipes A recipe is a sample AI application that is packaged as one or several containers. It is built by AI Lab when the user chooses to download and run it on their workstation. It is provided as source code and AI Lab will make sure the container images are built prior to launching the containers. A recipe has the following attributes: - ```id```: a unique identifier to the recipe - ```name```: the recipe name - ```description```: a detailed description about the recipe - ```repository```: the URL where the recipe code can be retrieved - ```ref```: an optional ref in the repository to checkout (a branch name, tag name, or commit full id - short commit id won't be recognized). If not defined, the default branch will be used - ```categories```: an array of category id to be associated by this recipe - ```basedir```: an optional path within the repository where the ai-lab.yaml file is located. If not provided, the ai-lab.yaml is assumed to be located at the root the repository - ```readme```: a markdown description of the recipe - ```models```: an array of model id to be associated with this recipe #### Recipe configuration file The configuration file is called ```ai-lab.yaml``` and follows the following syntax. The root elements are called ```version``` and ```application```. ```version``` represents the version of the specifications that ai-lab adheres to (so far, the only accepted value here is `v1.0`). ```application``` contains an attribute called ```containers``` whose syntax is an array of objects containing the following attributes: - ```name```: the name of the container - ```contextdir```: the context directory used to build the container. - ```containerfile```: the containerfile used to build the image - ```model-service```: a boolean flag used to indicate if the container is running the model or not - ```arch```: an optional array of architecture for which this image is compatible with. The values follow the [GOARCH specification](https://go.dev/src/go/build/syslist.go) - ```gpu-env```: an optional array of GPU environment for which this image is compatible with. The only accepted value here is cuda. - ```ports```: an optional array of ports for which the application listens to. - `image`: an optional image name to be used when building the container image. The container that is running the service (having the ```model-service``` flag equal to ```true```) can use at runtime the model managed by AI Lab through an environment variable ```MODEL_PATH``` whose value is the full path name of the model file. Below is given an example of such a configuration file: ```yaml application: containers: - name: chatbot-inference-app contextdir: ai_applications containerfile: builds/Containerfile - name: chatbot-model-service contextdir: model_services containerfile: base/Containerfile model-service: true arch: - arm64 - amd64 ports: - 8001 image: quay.io/redhat-et/chatbot-model-service:latest - name: chatbot-model-servicecuda contextdir: model_services containerfile: cuda/Containerfile model-service: true gpu-env: - cuda arch: - amd64 ports: - 8501 image: quay.io/redhat-et/model_services:latest ``` ================================================ FILE: README.md ================================================ # Podman AI Lab Podman AI Lab is an open source extension for Podman Desktop to work with LLMs (Large Language Models) on a local environment. Featuring a recipe catalog with common AI use cases, a curated set of open source models, and a playground for learning, prototyping and experimentation, Podman AI Lab helps you to quickly and easily get started bringing AI into your applications, without depending on infrastructure beyond your laptop ensuring data privacy and security. ## Topics - [Technology](#technology) - [Extension features](#extension-features) - [Requirements](#requirements) - [Installation](#installation) - [Usage](#usage) - [Contributing](#contributing) - [Feedback](#feedback) ## Technology Podman AI Lab uses [Podman](https://podman.io) machines to run inference servers for LLM models and AI applications. The AI models can be downloaded, and common formats like [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md), [Pytorch](https://pytorch.org) or [Tensorflow](https://www.tensorflow.org) are supported. ## Extension features ### AI models Podman AI Lab provides a curated list of open source AI models and LLMs. Once downloaded, the models are available to be used for AI applications, model services and playgrounds. #### Model services Once a model is downloaded, a model service can be started. A model service is an inference server that is running in a container and exposing the model through the well-known chat API common to many providers. #### Playgrounds The integrated Playground environments allow for experimenting with available models in a local environment. An intuitive user prompt helps in exploring the capabilities and accuracy of various models and aids in finding the best model for the use case at hand. The Playground interface further allows for parameterizing models to further optimize the settings and attributes of each model. ### AI applications Once an AI model is available through a well-known endpoint, it's easy to imagine a new world of applications that will connect and use the AI model. Podman AI Lab supports AI applications as a set of containers that are connected together. Podman AI Lab ships with a so-called Recipes Catalog that helps you navigate a number of core AI use cases and problem domains such as Chat Bots, Code Generators and Text Summarizers. Each recipe comes with detailed explanations and sample applications that can be run with various large language models (LLMs). Experimenting with multiple models allows finding the optimal one for your use case. ## Requirements ### Software - [Podman Desktop 1.8.0+](https://github.com/containers/podman-desktop) - [Podman 4.9.0+](https://github.com/containers/podman) - Compatible with Windows, macOS & Linux ### Hardware LLMs AI models are heavy resource consumers both in terms of memory and CPU. Each of the provided models consumes about 4GiB of memory and requires at least 4 CPUs to run. We recommend a minimum of 12GB of memory and at least 4 CPUs for the Podman machine. On Windows, the podman machine shares memory and CPU with all the Windows Subsystem for Linux (WSL) machines. By default, WSL is set to 50% of total memory and all logical processors. This can be changed in the WSL Settings (See [WSL Config](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#wslconfig)). As an additional recommended practice, do not run more than 3 models simultaneously. ## Installation You can install the Podman AI Lab extension directly inside Podman Desktop. Go to Extensions > Catalog > Install Podman AI Lab. ![](https://github.com/containers/podman-desktop-media/raw/ai-lab/gifs/install_ai_lab.gif) To install a development version, use the `Install custom...` action as shown in the recording below. The name of the image to use is `ghcr.io/containers/podman-desktop-extension-ai-lab`. You can get released tags for the image at https://github.com/containers/podman-desktop-extension-ai-lab/pkgs/container/podman-desktop-extension-ai-lab. ![](https://github.com/containers/podman-desktop-media/raw/ai-lab/gifs/install_development_version.gif) ## Usage 1. **Download a model** Let's select a model from the catalog and download it locally to our workstation. ![](https://github.com/containers/podman-desktop-media/raw/ai-lab/gifs/download-model.gif) 2. **Start an inference server** Once a model is available locally, let's start an inference server ![](https://github.com/containers/podman-desktop-media/raw/ai-lab/gifs/start-inference-server.gif) 3. **Start a playground to have a chat conversation with model** ![](https://github.com/containers/podman-desktop-media/raw/ai-lab/gifs/playground.gif) 4. **Start an AI application and use it from the browser** ![](https://github.com/containers/podman-desktop-media/raw/ai-lab/gifs/start-ai-app.gif) ## Contributing Want to help develop and contribute to Podman AI Lab? You can use `pnpm watch --extension-folder` from the Podman Desktop directory to automatically rebuild and test the AI Lab extension: > **_Note_**: make sure you have the appropriate [prerequisites](https://github.com/containers/podman-desktop/blob/main/CONTRIBUTING.md#prerequisites-prepare-your-environment) installed. ```sh git clone https://github.com/containers/podman-desktop git clone https://github.com/containers/podman-desktop-extension-ai-lab cd podman-desktop-extension-ai-lab corepack enable pnpm pnpm install pnpm build cd ../podman-desktop pnpm watch --extension-folder ../podman-desktop-extension-ai-lab/packages/backend ``` If you are live editing the frontend package, from packages/frontend folder: ``` $ pnpm watch ``` ### Cleaning up resources We'll be adding a way to let a user cleanup their environment: see issue https://github.com/containers/podman-desktop-extension-ai-lab/issues/469. For the time being, please consider the following actions: 1. Remove the extension from Podman Desktop, from the Settings > Extensions 2. Remove the running playground environments from the list of Pods 3. Remove the images built by the recipes 4. Remove the containers related to AI 5. Cleanup your local clone of the recipes: `$HOME/podman-desktop/ai-lab` ### 📖 Providing a custom catalog The extension provides by default a curated list of recipes, models and categories. However, this system is extensible and you can define your own. To enhance the existing catalog, you can create a file located in the extension storage folder `$HOME/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab/user-catalog.json`. It must follow the same format as the default catalog [in the sources of the extension](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/assets/ai.json). > :information_source: The default behaviour is to append the items of the user's catalog to the default one. > :warning: Each item (recipes, models or categories) has a unique id, when conflict between the default catalog and the user one are found, the user's items overwrite the defaults. ### Packaging sample applications Sample applications may be added to the catalog. See [packaging guide](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/PACKAGING-GUIDE.md) for detailed information. ## Roadmap The roadmap is always open and we are looking for your feedback. Please create new issues and upvote on the issues that are feeling the most important for you. We will be working on the following items: - **Expanded Recipes**: Discover new use cases and samples to inspire and accelerate your applications. - **GPU Acceleration**: Speeding up processing times by leveraging GPU acceleration. - **API/CLI**: Interact with Podman AI Lab from CLI and APIs. - **Enhanced Playgrounds**: Streamlined workflows and UX giving a better space to experiment with LLMs and quickly iterate. - **Fine Tuning with [InstructLab](https://instructlab.ai/)**: Re-train LLMs with a set of taxonomy knowledges. Learn more about [the InstructLab project](https://github.com/instructlab). - **Enable Function Calling**: Use LLMs to retrieve or interact with external tools by doing API calls. - **Local RAG**: Explore RAG pattern, load your document and test behavior of the model. - **Bridge with AI Platforms (incl. K8s)**: Connect to remote models and ease deployment of applications. ## Feedback You can provide your feedback on the extension with [this form](https://forms.gle/tctQ4RtZSiMyQr3R8) or create [an issue on this repository](https://github.com/containers/podman-desktop-extension-ai-lab/issues). ================================================ FILE: RELEASE.md ================================================ # Release process for Podman AI Lab ## Pre-requisites - Create Enhancement Issue `Release vX.X.X` for current sprint, then update the label to `kind/release` and assign it to yourself. - Confirm with Podman Desktop maintainers that pending / need-to-go-in PR's have been merged. - Notify main contributors on Discord / Slack. In the below example, we will pretend that we're upgrading from `1.1.0` to `1.2.0`. Please use the CORRECT release numbers as these are just example numbers. ## Release timeline Below is what a typical release week may look like: - **Monday (Notify):** 48-hour notification. Communicate to maintainers and public channels a release will be cut on Wednesday and to merge any pending PRs. Inform QE team. Start work on blog post as it is usually the longest part of the release process. - **Tuesday (Staging, Testing & Blog):** Stage the release (see instructions below) to create a new cut of the release to test. Test the pre-release (master branch) build briefly. Get feedback from committers (if applicable). Push the blog post for review (as it usually takes a few back-and-forth reviews on documentation). - **Wednesday (Release):** Publish the new release on the catalog using the below release process. - **Thursday (Post-release Testing & Blog):** Test the post-release build briefly for any critical bugs. Confirm that new release has been pushed to the catalog. Push the blog post live. Get a known issues list together from QE and publish to the Podman Desktop Discussions, link to this from the release notes. - **Friday (Communicate):** Friday is statistically the best day for new announcements. Post on internal channels. Post on reddit, hackernews, twitter, etc. ## Releasing on GitHub 1. Go to https://github.com/containers/podman-desktop-extension-ai-lab/actions/workflows/release.yaml 1. Click on the top right drop-down menu `Run workflow` 1. Enter the name of the release. Example: `1.2.0` (DO NOT use the v prefix like v1.2.0) 1. Specify the branch to use for the new release. It's main for all major releases. For a bugfix release, you'll select a different branch. 1. Click on the `Run workflow` button. 1. Note: `Run workflow` takes approximately 2-3 minutes. 1. Close the milestone for the respective release, make sure that all tasks within the milestone are completed / updated before closing. https://github.com/containers/podman-desktop-extension-ai-lab/milestones 1. If not already created, click on `New Milestone` and create a new milestone for the NEXT release. 1. Check that https://github.com/containers/podman-desktop-extension-ai-lab/actions/workflows/release.yaml has been completed. 1. There should be an automated PR that has been created. This will be automatically merged in after all tests have been ran (takes 5-10 minutes). The title looks like `chore: 📢 Bump version to 1.3.0`. Rerun workflow manually if some of e2e tests are failing. 1. Above PR MUST be merged before continuing with the steps. 1. Edit the new release https://github.com/containers/podman-desktop-extension-ai-lab/releases/edit/v1.2.0 1. Select previous tag (v1.1.0) and click on `Generate release notes` and the click on `Update release` ## Test release before it is rolling out. The release is a pre-release, it means it is not yet the latest version, so no clients will automatically update to this version. It allows QE (and everyone else) to test the release before they it will go live on the catalog. ## Next phase - ❌ All severe bugs and regressions are investigated and discussed. If we agree any should block the release, need to fix the bugs and do a respin of the release with a new .z release like 1.2.1 instead of 1.2.0. Create a branch if it does not exist. For example 1.2.x if 1.2.0 failed. Then, cherry-pick bugfixes in that branch. - ✅ If committers agree we have a green light, proceed. **Do not forget to change the release from 'pre-release' to 'latest release' before proceeding**. ## Updating catalog Pre-requisites: - Ensure the release is OK (green workflow, image has been published https://github.com/containers/podman-desktop-extension-ai-lab/releases https://github.com/containers/podman-desktop-extension-ai-lab/pkgs/container/podman-desktop-extension-ai-lab). #### Catalog Create and submit a PR to the catalog (https://github.com/containers/podman-desktop-catalog on branch gh-pages). This is manual and will be automated in the future. ================================================ FILE: SECURITY.md ================================================ ## Security and Disclosure Information Policy for the Podman Desktop Extension AI Lab Project The Podman Desktop Extension AI Lab Project follows the [Security and Disclosure Information Policy](https://github.com/containers/common/blob/main/SECURITY.md) for the Containers Projects. ================================================ FILE: USAGE_DATA.md ================================================ # Data Collection The AI Lab extension uses telemetry to collect anonymous usage data in order to identify issues and improve our user experience. You can read our privacy statement [here](https://developers.redhat.com/article/tool-data-collection). Telemetry for the extension is based on the Podman Desktop telemetry. Users are prompted during Podman Desktop first startup to accept or decline telemetry. This setting can be changed at any time in Settings > Preferences > Telemetry. On disk the setting is stored in the `"telemetry.*"` keys within the settings file, at `$HOME/.local/share/containers/podman-desktop/configuration/settings.json`. A generated anonymous id is stored at `$HOME/.redhat/anonymousId`. ## What's included in the telemetry data - General information, including operating system, machine architecture, and country. - When the extension starts and stops. - When the icon to enter the extension zone is clicked. - When a recipe page is opened (with recipe Id and name). - When a sample application is pulled (with recipe Id and name). - When a playground is started or stopped (with model Id). - When a request is sent to a model in the playground (with model Id, **without** request content). - When a model is downloaded or deleted from disk. No personally identifiable information is captured. An anonymous id is used so that we can correlate the actions of a user even if we can't tell who they are. ================================================ FILE: api/openapi.yaml ================================================ openapi: 3.0.0 info: title: Podman Desktop AI Lab API description: API for interacting with the Podman Desktop AI Lab service. version: 0.0.1 servers: - url: http://{host}:{port} description: Podman Desktop AI Lab API server variables: host: default: 127.0.0.1 port: default: '10434' tags: - name: server description: Server information paths: /api/version: get: operationId: getServerVersion tags: - server description: Return the Podman Desktop AI Lab API server version summary: Return the Podman Desktop AI Lab API server version responses: '200': description: The Podman Desktop AI Lab API server version was successfully fetched content: application/json: schema: type: object additionalProperties: false properties: version: type: string required: - version /api/tags: get: operationId: getModels tags: - models description: List models that are available locally summary: List models that are available locally responses: '200': description: The models were successfully fetched content: application/json: schema: $ref: '#/components/schemas/ListResponse' /api/pull: post: operationId: pullModel tags: - models description: | Download a model from the Podman AI Lab catalog. summary: | Download a model from the Podman AI Lab Catalog. requestBody: required: true description: Request to pull a model content: application/json: schema: $ref: '#/components/schemas/PullRequest' responses: '200': description: Model was successfully pulled content: application/x-ndjson: schema: $ref: '#/components/schemas/ProgressResponse' /api/show: post: operationId: showModel tags: - models description: | Not implemented, returns an empty object - Show information about a model including details, modelfile, template, parameters, license, and system prompt. summary: | Show information about a model including details, modelfile, template, parameters, license, and system prompt. requestBody: required: true description: Request to show a model content: application/json: schema: $ref: '#/components/schemas/ShowRequest' responses: '200': description: The model's information was successfully fetched content: application/json: schema: $ref: '#/components/schemas/ShowResponse' /api/generate: post: operationId: generateResponse tags: - generate description: | Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request. summary: | Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request. requestBody: required: true description: Request to generate a response content: application/json: schema: $ref: '#/components/schemas/GenerateRequest' responses: '200': description: A response was successfully generated for the prompt content: application/json: schema: $ref: '#/components/schemas/GenerateResponse' /api/chat: post: operationId: generateChat tags: - chat - generate description: | Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request. summary: | Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using "stream": false. The final response object will include statistics and additional data from the request. requestBody: required: true description: Request to generate a response in a chat content: application/json: schema: $ref: '#/components/schemas/ChatRequest' responses: '200': description: The next message was successfully generated for the chat content: application/json: schema: $ref: '#/components/schemas/ChatResponse' /api/ps: get: operationId: getRunningModels tags: - models description: List running models summary: List running models responses: '200': description: The list of running models was successfully fetched content: application/json: schema: $ref: '#/components/schemas/ProcessResponse' components: schemas: ListResponse: type: object description: Response from a list request properties: models: type: array items: $ref: '#/components/schemas/ListModelResponse' ListModelResponse: type: object description: Response from a list request properties: name: type: string model: type: string modified_at: type: string format: date-time size: type: integer digest: type: string details: $ref: '#/components/schemas/ModelDetails' ProcessResponse: type: object description: Response with a list of running models properties: models: type: array items: $ref: '#/components/schemas/ProcessModelResponse' ProcessModelResponse: type: object description: Running model description properties: name: type: string model: type: string size: type: integer digest: type: string details: $ref: '#/components/schemas/ModelDetails' expires_at: type: string format: date-time size_vram: type: integer ModelDetails: type: object description: Details about a model properties: parent_model: type: string format: type: string family: type: string families: type: array items: type: string parameter_size: type: string quantization_level: type: string PullRequest: type: object description: Request to pull a model properties: model: type: string description: The name of the model to pull example: instructlab/granite-7b-lab-GGUF insecure: type: boolean description: | allow insecure connections to the catalog. stream: type: boolean description: | If false the response will be returned as a single response object, rather than a stream of objects required: - model ProgressResponse: type: object description: The response returned from various streaming endpoints properties: status: type: string description: The status of the request digest: type: string description: The SHA256 digest of the blob total: type: integer description: The total size of the task completed: type: integer description: The completed size of the task ShowRequest: type: object description: Request to show a model properties: model: type: string description: The name of the model to show required: - model ShowResponse: type: object description: Response from a show request properties: license: type: string description: The model license modelfile: type: string description: The modelfile content parameters: type: string description: The model parameters template: type: string description: The model template system: type: string description: The model system message/prompt details: $ref: '#/components/schemas/ModelDetails' messages: type: array items: $ref: '#/components/schemas/Message' GenerateRequest: type: object description: Request to generate a response properties: model: type: string description: The model name prompt: type: string description: The prompt to generate a response for suffix: type: string images: type: array items: type: string format: byte description: | A list of base64-encoded images (for multimodal models such as llava) format: type: string description: | The format to return a response in. Currently the only accepted value is json system: type: string description: | System message to (overrides what is defined in the Modelfile) template: type: string description: | The prompt template to use (overrides what is defined in the Modelfile) context: type: array items: type: integer description: | The context parameter returned from a previous request to generate, this can be used to keep a short conversational memory example: [] stream: type: boolean description: | If false the response will be returned as a single response object, rather than a stream of objects raw: type: boolean description: | If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API keep_alive: $ref: '#/components/schemas/Duration' required: - model GenerateResponse: type: object description: Response from a generate request properties: model: type: string description: The model name that generated the response created_at: type: string format: date-time description: Timestamp of the response response: type: string description: | The textual response itself. When done, empty if the response was streamed, if not streamed, this will contain the full response done: type: boolean description: Specifies if the response is complete context: type: array items: type: integer description: | When done, encoding of the conversation used in this response total_duration: type: number description: When done, time spent generating the response load_duration: type: number description: When done, time spent in nanoseconds loading the model prompt_eval_count: type: integer description: When done, number of tokens in the prompt prompt_eval_duration: type: number description: | When done, time spent in nanoseconds evaluating the prompt eval_count: type: integer description: When done, number of tokens in the response eval_duration: type: number description: | When done, time in nanoseconds spent generating the response ChatRequest: type: object description: Request to generate a response in a chat properties: model: type: string description: The model name messages: type: array items: $ref: '#/components/schemas/Message' description: Messages of the chat - can be used to keep a chat memory stream: type: boolean description: Enable streaming of returned response format: type: string description: Format to return the response in (e.g. "json") keep_alive: $ref: '#/components/schemas/Duration' options: $ref: '#/components/schemas/Options' ChatResponse: type: object description: Response from a chat request properties: model: type: string description: The model name created_at: type: string format: date-time description: Timestamp of the response message: $ref: '#/components/schemas/Message' done_reason: type: string description: Reason the model stopped generating text done: type: boolean description: Specifies if the response is complete total_duration: type: number description: Total duration of the request load_duration: type: number description: Load duration of the request prompt_eval_count: type: integer description: Count of prompt evaluations prompt_eval_duration: type: number description: Duration of prompt evaluations eval_count: type: integer description: Count of evaluations eval_duration: type: number description: Duration of evaluations Message: type: object description: A message in a chat properties: role: type: string content: type: string images: type: array items: type: string format: byte Duration: type: string description: A string representing the duration example: "5m" Options: type: object description: | Advanced model and runner options for generation and chat requests properties: num_keep: type: integer description: | Specifies the number of tokens from the beginning of the context ot retain when the context limit is reached. (Default: 4) example: 4 seed: type: integer description: | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) example: -1 num_predict: type: integer description: | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) example: -1 top_k: type: integer description: | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) example: 40 top_p: type: number format: float description: | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) example: 0.9 tfs_z: type: number format: float description: | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) example: 1.0 typical_p: type: number format: float description: | Controls the selection of typical words based on their probability distribution. A higher value (e.g., 0.95) focuses on more typical words, reducing the chance of unusual words being selected. (Default: 1.0) example: 1.0 repeat_last_n: type: integer description: | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) example: 64 temperature: type: number format: float description: | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) example: 0.8 repeat_penalty: type: number format: float description: | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) example: 1.1 presence_penalty: type: number format: float description: | Applies a penalty to tokens that have already appeared in the generated text, encouraging the model to introduce new tokens. A higher value increases this penalty, promoting more varied and less repetitive output. (Default: 0.8) example: 0.8 frequency_penalty: type: number format: float description: | Penalizes tokens based on their frequency in the generated text so far. A higher value reduces the likelihood of frequent tokens being generated again, promoting more diverse outputs. (Default: 0.8) example: 0.8 mirostat: type: number format: float description: | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) example: 0 mirostat_tau: type: number format: float description: | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) example: 5.8 mirostat_eta: type: number format: float description: | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1) example: 0.1 penalize_newline: type: boolean description: | Determines whether the model should penalize the generation of newlines, which can help control the structure and formatting of the output. (Default: true) example: true stop: type: array items: type: string description: | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile. example: ['AI assistant.'] numa: type: boolean description: | Indicates whether to use Non-Uniform Memory Access (NUMA) for optimizing memory usage and performance on multi-processor systems. (Default: false) example: false num_ctx: type: integer description: | Sets the size of the context window used to generate the next token. (Default: 2048) example: 2048 num_batch: type: integer description: | Specifies the number of batches for processing. (Default: 512) example: 512 num_gpu: type: integer description: | Specifies the number of GPUs to use. A value of -1 uses all available GPUs. (Default: -1) example: -1 main_gpu: type: integer description: | Specifies the primary GPU to use for processing. (Default: 0) low_vram: type: boolean description: | Indicates whether to optimize the model for low VRAM usage. (Default: false) example: false f16_kv: type: boolean description: | Indicates whether to use 16-bit floating point precision for key-value pairs, reducing memory usage. (Default: false) example: true logits_all: type: boolean description: | Specifies whether to output logits for all tokens. (Default: false) example: false vocab_only: type: boolean description: | Indicates whether to only load the vocabulary without the full model. (Default: false) example: false use_mmap: type: boolean description: | Determines whether to use memory-mapped files for loading the model, improving performance on large models. (Default: true) example: true use_mlock: type: boolean description: | Determines whether to use memory locking to prevent swapping the model out of RAM. (Default: false) example: false num_thread: type: integer description: | Specifies the number of threads to use for processing. A value of 0 uses all available threads. (Default: 0) example: 0 ================================================ FILE: clean.sh ================================================ rm -rf node_modules packages/backend/node_modules packages/frontend/node_modules ================================================ FILE: commitlint.config.js ================================================ module.exports = { extends: ['@commitlint/config-conventional'] }; ================================================ FILE: docs/proposals/ai-studio.md ================================================ # Motivation Today, there is no notion of ordering between the containers. But we know that we have a dependency between the client application and the container that is running the model. The second issue is that there is no concept of starting point for a container so today we rely only on the container being started by the container engine and we know that this is not adequate for the model service container So this is handle by a kind of dirty fix: the containers are all started in parallel but as the client application will fail because the model service is started (as it take a while), so we are trying to restart the client application until the model service is properly started. The purpose of this change is to propose an update to the ai-lab.yaml so that it is as much generic as it could be and inspired from the Compose specification. ## Proposed changes Define a condition for the container to be properly started: this would be based on the readinessProbe that can already be defined in a Kubernetes container. In the first iteration, we would support only the ```exec``` field. If ```readinessProbe``` is defined, then we would check for the healthcheck status field to be ```healthy``` So the current chatbot file would be updated from: ```yaml application: type: language name: chatbot description: This is a LLM chatbot application that can interact with a llamacpp model-service containers: - name: chatbot-inference-app contextdir: ai_applications containerfile: builds/Containerfile - name: chatbot-model-service contextdir: model_services containerfile: base/Containerfile model-service: true backend: - llama arch: - arm64 - amd64 - name: chatbot-model-servicecuda contextdir: model_services containerfile: cuda/Containerfile model-service: true backend: - llama gpu-env: - cuda arch: - amd64 ``` to ```yaml application: type: language name: chatbot description: This is a LLM chatbot application that can interact with a llamacpp model-service containers: - name: chatbot-inference-app contextdir: ai_applications containerfile: builds/Containerfile readinessProbe: # added exec: # added command: # added - curl -f localhost:8080 || exit 1 # added - name: chatbot-model-service contextdir: model_services containerfile: base/Containerfile model-service: true readinessProbe: # added exec: # added command: # added - curl -f localhost:7860 || exit 1 # added backend: - llama arch: - arm64 - amd64 - name: chatbot-model-service contextdir: model_services containerfile: cuda/Containerfile model-service: true readinessProbe: # added exec: # added command: # added - curl -f localhost:7860 || exit 1 # added backend: - llama gpu-env: - cuda arch: - amd64 ``` From the Podman Desktop API point of view, this would require extending the [ContainerCreateOptions](https://podman-desktop.io/api/interfaces/ContainerCreateOptions) structure to support the HealthCheck option. ================================================ FILE: docs/proposals/state-management.md ================================================ # State management The backend manages and persists the State. The backend pushes new state to the front-end when changes happen, and the front-end can ask for the current value of the state. The front-end uses `readable` stores to expose the state to the different pages. The store listens for new states pushed by the backend (`onMessage`), and asks for the current state at initial time. The pages of the front-end subscribe to the store to get the value of the state in a reactive manner. ## Catalog The catalog is persisted as a file in the user's filesystem. The backend reads the file at startup, and watches the file for changes. The backend updates the state as soon as changes it detects changes. The front-end uses a `readable` store, which waits for changes on the Catalog state (using `onMessage('new-catalog-state', data)`), and asks for the current state at startup (with `postMessage('ask-catalog-state')`). The interested pages of the front-end subscribe to the store to get the value of the Catalog state in a reactive manner. ## Pulled applications The front-end initiates the pulling of an application (using `postMessage('pull-application', app-id)`). The backend manages and persists the state of the pulled applications and pushes every update on the state (progression, etc.) (using `postMessage('new-pulled-application-state, app-id, data)`). The front-end uses a `readable` store, which waits for changes on the Pulled Applications state (using `onMessage('new-pulled-application-state)`), and asks for the current state at startup (with `postMessage('ask-pulled-applications-state')`). The interested pages of the front-end subscribe to the store to get the value of the Pulled Applications state in a reactive manner. ## Errors The front-end initiates operations (pull application, etc). When an error happens during an operation, the backend manages and persists the error in a centralized way. The backend pushes new errors (using `postMessage('new-error-state', data)`). Optionally, it can push errors to the core Podman Desktop, to display errors in the notifications system. The front-end uses a `readable` store, which waits for changes on the Errors state (using `onMessage('new-error-state')`), and asks for the current state at startup (using `postMessage('ask-error-state)`). The interested pages of the front-end subscribe to the store to display the errors related to the page. The user can acknowledge an error (using a `postMessage('ack-error', id)`). ================================================ FILE: eslint.config.mjs ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import globals from 'globals'; import js from '@eslint/js'; import typescriptLint from 'typescript-eslint'; import tsParser from '@typescript-eslint/parser'; import svelteParser from 'svelte-eslint-parser'; import importPlugin from 'eslint-plugin-import'; import { fixupConfigRules, fixupPluginRules } from '@eslint/compat'; import { fileURLToPath } from 'node:url'; import path from 'node:path'; import { FlatCompat } from '@eslint/eslintrc'; import unicorn from 'eslint-plugin-unicorn'; import noNull from 'eslint-plugin-no-null'; import sonarjs from 'eslint-plugin-sonarjs'; import etc from 'eslint-plugin-etc'; import svelte from 'eslint-plugin-svelte'; import redundantUndefined from 'eslint-plugin-redundant-undefined'; import simpleImportSort from 'eslint-plugin-simple-import-sort'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const compat = new FlatCompat({ baseDirectory: __dirname, recommendedConfig: js.configs.recommended, allConfig: js.configs.all, }); const TYPESCRIPT_PROJECTS = ['packages/*/tsconfig.json', 'tests/*/tsconfig.json']; export default [ { ignores: [ '*.config.*js', '**/*.config.*js', '**/dist/**/*', '**/test-resources', '**/__mocks__/', '**/coverage/', 'packages/backend/media/**', '**/.svelte-kit/', 'scripts/**', '**/src-generated/', 'tests/playwright/tests/playwright/output/**', ], }, js.configs.recommended, ...typescriptLint.configs.recommended, sonarjs.configs.recommended, ...svelte.configs['flat/recommended'], ...fixupConfigRules( compat.extends('plugin:import/recommended', 'plugin:import/typescript', 'plugin:etc/recommended'), ), { plugins: { // compliant v9 plug-ins unicorn, // non-compliant v9 plug-ins etc: fixupPluginRules(etc), import: fixupPluginRules(importPlugin), 'no-null': fixupPluginRules(noNull), 'redundant-undefined': fixupPluginRules(redundantUndefined), 'simple-import-sort': fixupPluginRules(simpleImportSort), }, settings: { 'import/resolver': { typescript: true, node: true, 'eslint-import-resolver-custom-alias': { alias: { '/@': './src', '/@gen': './src-generated', }, extensions: ['.ts'], packages: ['packages/*'], }, }, }, }, { linterOptions: { reportUnusedDisableDirectives: 'off', }, languageOptions: { globals: { ...globals.node, }, // parser: tsParser, sourceType: 'module', parserOptions: { extraFileExtensions: ['.svelte'], warnOnUnsupportedTypeScriptVersion: false, project: TYPESCRIPT_PROJECTS, }, }, }, { rules: { eqeqeq: 'error', 'prefer-promise-reject-errors': 'error', semi: ['error', 'always'], 'comma-dangle': ['warn', 'always-multiline'], quotes: [ 'error', 'single', { allowTemplateLiterals: true, }, ], '@typescript-eslint/explicit-function-return-type': 'off', '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_', caughtErrors: 'none' }], '@typescript-eslint/no-var-requires': 'off', '@typescript-eslint/consistent-type-imports': 'error', '@typescript-eslint/no-explicit-any': 'error', '@typescript-eslint/await-thenable': 'error', '@typescript-eslint/no-floating-promises': ['error', { ignoreVoid: false }], '@typescript-eslint/no-misused-promises': 'error', '@typescript-eslint/prefer-optional-chain': 'error', '@typescript-eslint/explicit-function-return-type': 'error', '@typescript-eslint/prefer-nullish-coalescing': [ 'error', { ignoreConditionalTests: true, }, ], '@typescript-eslint/no-require-imports': 'off', // unicorn custom rules 'unicorn/prefer-node-protocol': 'error', 'no-null/no-null': 'error', 'sonarjs/no-empty-function': 'off', 'sonarjs/deprecation': 'off', 'sonarjs/todo-tag': 'off', 'sonarjs/sonar-no-fallthrough': 'off', /** * Having a semicolon helps the optimizer interpret your code correctly. * This avoids rare errors in optimized code. * @see https://twitter.com/alex_kozack/status/1364210394328408066 */ semi: ['error', 'always'], /** * This will make the history of changes in the hit a little cleaner */ 'comma-dangle': ['warn', 'always-multiline'], /** * Just for beauty */ quotes: ['error', 'single', { allowTemplateLiterals: true }], // disabled import/namespace rule as the plug-in is not fully compatible using the compat mode 'import/namespace': 'off', 'import/no-duplicates': 'error', 'import/first': 'error', 'import/newline-after-import': 'error', 'import/no-extraneous-dependencies': 'error', 'import/no-unresolved': 'off', 'import/default': 'off', 'import/no-named-as-default-member': 'off', 'import/no-named-as-default': 'off', 'sonarjs/cognitive-complexity': 'off', 'sonarjs/no-duplicate-string': 'off', 'sonarjs/no-empty-collection': 'off', 'sonarjs/no-small-switch': 'off', 'sonarjs/no-unused-expressions': 'off', 'etc/no-commented-out-code': 'error', 'etc/no-deprecated': 'off', 'etc/no-commented-out-code': 'off', 'redundant-undefined/redundant-undefined': 'error', 'import/no-extraneous-dependencies': 'error', 'import/no-restricted-paths': [ 'error', { zones: [ { target: './packages/backend/**/*', from: ['./packages/frontend/**/*'], }, { target: './packages/frontend/**/*', from: ['./packages/backend/**/*'], }, ], }, ], // disabled as code in this project is not yet compliant: 'svelte/valid-compile': 'off', 'no-undef': 'off', }, }, { files: ['**/*.svelte'], languageOptions: { parser: svelteParser, ecmaVersion: 5, sourceType: 'script', parserOptions: { parser: tsParser, }, }, rules: { eqeqeq: 'off', 'etc/no-implicit-any-catch': 'off', 'no-inner-declarations': 'off', 'sonarjs/code-eval': 'off', 'sonarjs/different-types-comparison': 'off', 'sonarjs/prefer-nullish-coalescing': 'off', 'sonarjs/no-nested-template-literals': 'off', 'sonarjs/no-nested-conditional': 'off', '@typescript-eslint/no-unused-vars': 'off', '@typescript-eslint/ban-types': 'off', '@typescript-eslint/no-unused-expressions': 'off', }, }, { files: ['packages/frontend/**'], languageOptions: { globals: { ...Object.fromEntries(Object.entries(globals.node).map(([key]) => [key, 'off'])), ...globals.browser, }, }, }, { files: ['packages/shared/**'], languageOptions: { globals: { ...Object.fromEntries(Object.entries(globals.node).map(([key]) => [key, 'off'])), ...Object.fromEntries(Object.entries(globals.browser).map(([key]) => [key, 'off'])), }, }, }, ]; ================================================ FILE: package.json ================================================ { "name": "ai-lab-monorepo", "displayName": "ai-lab-monorepo", "description": "ai-lab-monorepo", "publisher": "redhat", "version": "1.10.0-next", "license": "Apache-2.0", "private": true, "engines": { "node": ">=24.0.0", "npm": ">=10.2.3" }, "scripts": { "build": "concurrently \"cd packages/frontend && pnpm run build\" \"cd packages/backend && pnpm run build\"", "watch": "concurrently \"cd packages/frontend && pnpm run watch\" \"cd packages/backend && pnpm run watch\"", "format:check": "prettier --check \"**/src/**/*.{ts,svelte}\"", "format:fix": "prettier --write \"**/src/**/*.{ts,svelte}\"", "lint:check": "eslint . --cache", "lint:fix": "eslint . --cache --fix", "svelte:check": "svelte-check", "test:backend": "vitest run -r packages/backend --passWithNoTests --coverage", "test:frontend": "vitest -c packages/frontend/vite.config.js run packages/frontend --passWithNoTests --coverage", "test:shared": "vitest run -r packages/shared --passWithNoTests --coverage", "test:unit": "pnpm run test:backend && pnpm run test:shared && pnpm run test:frontend", "test:e2e": "cd tests/playwright && pnpm run test:e2e", "test:e2e:smoke": "cd tests/playwright && pnpm run test:e2e:smoke", "test:e2e:instructlab": "cd tests/playwright && pnpm run test:e2e:instructlab", "typecheck:shared": "tsc --noEmit --project packages/shared", "typecheck:frontend": "tsc --noEmit --project packages/frontend", "typecheck:backend": "cd packages/backend && pnpm run typecheck", "typecheck": "pnpm run typecheck:shared && pnpm run typecheck:frontend && pnpm run typecheck:backend", "prepare": "husky" }, "resolutions": { "string-width": "^4.2.0", "wrap-ansi": "^7.0.0", "postman-code-generators": "1.10.1" }, "lint-staged": { "*.{js,ts,tsx,svelte}": [ "eslint --cache --fix", "prettier --cache --write" ], "*.{md,css,json}": "prettier --write" }, "devDependencies": { "@commitlint/cli": "^20.5.2", "@commitlint/config-conventional": "^20.5.0", "@eslint/compat": "^2.0.5", "@typescript-eslint/eslint-plugin": "^8.59.1", "@typescript-eslint/parser": "^8.59.1", "@vitest/coverage-v8": "^3.2.3", "autoprefixer": "^10.5.0", "commitlint": "^20.5.2", "concurrently": "^9.2.1", "eslint": "^9.39.2", "eslint-import-resolver-custom-alias": "^1.3.2", "eslint-import-resolver-typescript": "^4.3.5", "eslint-plugin-etc": "^2.0.3", "eslint-plugin-import": "^2.31.0", "eslint-plugin-no-null": "^1.0.2", "eslint-plugin-redundant-undefined": "^1.0.0", "eslint-plugin-simple-import-sort": "^13.0.0", "eslint-plugin-sonarjs": "^4.0.3", "eslint-plugin-svelte": "^3.17.1", "eslint-plugin-unicorn": "^64.0.0", "globals": "^17.5.0", "husky": "^9.1.7", "lint-staged": "^16.4.0", "msw": "^2.14.2", "prettier": "^3.8.3", "prettier-plugin-svelte": "^3.5.1", "svelte-check": "^4.4.6", "svelte-eslint-parser": "^1.6.0", "typescript": "5.9.3", "typescript-eslint": "^8.59.1", "vite": "^7.3.1", "vitest": "^3.0.5" }, "workspaces": { "packages": [ "packages/*", "tests/*" ] }, "dependencies": { "js-yaml": "^4.1.1", "zod": "^4.3.6" }, "scarfSettings": { "enabled": false }, "pnpm": { "overrides": { "postman-collection>semver": "^7.5.2" }, "ignoredBuiltDependencies": [ "@scarf/scarf", "@tailwindcss/oxide", "esbuild", "postman-code-generators", "svelte-preprocess", "unrs-resolver" ] }, "packageManager": "pnpm@10.12.4+sha512.5ea8b0deed94ed68691c9bad4c955492705c5eeb8a87ef86bc62c74a26b037b08ff9570f108b2e4dbd1dd1a9186fea925e527f141c648e85af45631074680184" } ================================================ FILE: packages/backend/.gitignore ================================================ media /src-generated ================================================ FILE: packages/backend/__mocks__/@podman-desktop/api.js ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ /** * Mock the extension API for vitest. * This file is referenced from vitest.config.js file. */ const plugin = {}; module.exports = plugin; ================================================ FILE: packages/backend/package.json ================================================ { "name": "ai-lab", "displayName": "Podman AI Lab", "description": "Podman AI Lab lets you work with LLMs locally, exploring AI fundamentals, experimenting with models and prompts, and serving models while maintaining data security and privacy.", "version": "1.10.0-next", "icon": "icon.png", "type": "module", "publisher": "redhat", "license": "Apache-2.0", "engines": { "podman-desktop": ">=1.8.0" }, "main": "./dist/extension.cjs", "contributes": { "commands": [ { "command": "ai-lab.navigation.inference.start", "title": "AI Lab: navigate to inference start page", "hidden": true }, { "command": "ai-lab.navigation.recipe.start", "title": "AI Lab: navigate to recipe start page", "hidden": true } ], "configuration": { "title": "AI Lab", "properties": { "ai-lab.models.path": { "type": "string", "format": "folder", "default": "", "description": "Custom path where to download models. Note: The extension must be restarted for changes to take effect. (Default is blank)" }, "ai-lab.modelUploadDisabled": { "type": "boolean", "default": false, "description": "Disable the model upload to the podman machine", "hidden": true }, "ai-lab.experimentalGPU": { "type": "boolean", "default": false, "description": "Experimental GPU support for inference servers" }, "ai-lab.apiPort": { "type": "number", "default": 10434, "minimum": 1024, "maximum": 65535, "description": "Port on which the API is listening (requires restart of extension)" }, "ai-lab.inferenceRuntime": { "type": "string", "enum": [ "all", "llama-cpp", "whisper-cpp", "none" ], "description": "Choose the default inferencing runtime for AI Lab" }, "ai-lab.experimentalTuning": { "type": "boolean", "default": false, "description": "Display InstructLab Tuning screens (experimental)", "hidden": true }, "ai-lab.showGPUPromotion": { "type": "boolean", "default": true, "description": "Display GPU promotion banner", "hidden": true } } }, "icons": { "brain-icon": { "description": "Brain icon", "default": { "fontPath": "brain.woff2", "fontCharacter": "\\E001" } } }, "views": { "icons/containersList": [ { "when": "ai-lab-model-id in containerLabelKeys", "icon": "${brain-icon}" } ], "icons/image": [ { "when": "ai-lab-recipe-id in imageLabelKeys", "icon": "${brain-icon}" } ] } }, "scripts": { "generate": "npx openapi-typescript ../../api/openapi.yaml -o src-generated/openapi.ts", "build": "pnpm run generate && vite build", "test": "vitest run --coverage", "test:watch": "vitest watch --coverage", "format:check": "prettier --check \"src/**/*.ts\"", "format:fix": "prettier --write \"src/**/*.ts\"", "watch": "pnpm run generate && npx vite --mode development build -w", "typecheck": "pnpm run generate && tsc --noEmit" }, "dependencies": { "@ai-sdk/mcp": "^1.0.36", "@ai-sdk/openai-compatible": "^2.0.42", "@huggingface/gguf": "^0.4.2", "@huggingface/hub": "^2.11.0", "ai": "^6.0.168", "express": "^5.2.1", "express-openapi-validator": "^5.6.2", "isomorphic-git": "^1.37.6", "js-yaml": "^4.1.1", "mustache": "^4.2.0", "openai": "^6.35.0", "postman-code-generators": "^1.14.1", "postman-collection": "^5.3.0", "semver": "^7.7.4", "swagger-ui-dist": "^5.32.5", "swagger-ui-express": "^5.0.1", "systeminformation": "^5.31.5", "xml-js": "^1.6.11" }, "devDependencies": { "@podman-desktop/api": "1.13.0-202409181313-78725a6565", "@ai-sdk/provider": "^3.0.8", "@ai-sdk/provider-utils": "^4.0.24", "@rollup/plugin-replace": "^6.0.3", "@types/express": "^5.0.6", "@types/js-yaml": "^4.0.9", "@types/mustache": "^4.2.6", "@types/node": "^24", "@types/postman-collection": "^3.5.11", "@types/supertest": "^7.2.0", "@types/swagger-ui-dist": "^3.30.5", "@types/swagger-ui-express": "^4.1.8", "openapi-typescript": "^7.13.0", "supertest": "^7.2.2", "vitest": "^3.0.5" } } ================================================ FILE: packages/backend/src/assets/ai.json ================================================ { "version": "1.0", "recipes": [ { "id": "chatbot", "description": "This recipe provides a blueprint for developers to create their own AI-powered chat applications using Streamlit.", "name": "ChatBot", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/chatbot", "readme": "# Chat Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the chat application below.\n\n![](/assets/chatbot_ui.png) \n\n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `chatbot`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the chatbot application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot\npodman pod rm chatbot\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "langchain"] }, { "id": "chatbot-pydantic-ai", "description": "This recipe provides a blueprint for developers to create their own AI-powered chat applications with the pydantic framework using Streamlit", "name": "Chatbot PydanticAI", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/chatbot-pydantic-ai", "readme": "# Chatbot Pydantic Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the chat application below.\n\n![](/assets/chatbot_ui.png) \n\n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot Pydantic AI` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot-pydantic-ai.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot-pydantic-ai.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `chatbot-pydantic-ai`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the chatbot-pydantic-ai application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot-pydantic-ai\npodman pod rm chatbot-pydantic-ai\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot-pydantic-ai\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-pydantic-ai from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-pydantic-ai from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot-pydantic-ai applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot-pydantic-ai workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot-pydantic-ai application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-pydantic-ai-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot-pydantic-ai service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot-pydantic-ai\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "PydanticAI"] }, { "id": "agents", "description": "This recipe shows how ReAct can be used to create an intelligent music discovery assistant with Spotify API.", "name": "ReAct Agent Application", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/agents", "readme": "# ReAct Agent Application\n\n This recipe demonstrates the ReAct (Reasoning and Acting) framework in action through a music exploration application. ReAct enables AI to think step-by-step about tasks, take appropriate actions, and provide reasoned responses. The application shows how ReAct can be used to create an intelligent music discovery assistant that combines reasoning with Spotify API interactions.\nThe application utilizes [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) for the Model Service and integrates with Spotify's API for music data. The recipe uses [Langchain](https://python.langchain.com/docs/get_started/introduction) for the ReAct implementation and [Streamlit](https://streamlit.io/) for the UI layer.\n\n## Spotify API Access\nTo use this application, you'll need Spotify API credentials (follow the link here for documentation https://developer.spotify.com/documentation/web-api):\n- Create a Spotify Developer account\n- Create an application in the Spotify Developer Dashboard (https://developer.spotify.com/documentation/web-api/concepts/apps dont worry about adding web/redirect url use the defaults)\n- Get your Client ID and Client Secret once the app is created (https://developer.spotify.com/dashboard)\n\nThese can be provided through environment variables or the application's UI.\n\n## Try the ReAct Agent Application\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `ReAct Agent` and follow the instructions to start the application.\n\n# Build the Application\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the Pod above is built, run, and what purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n## Download a model\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of ways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from [huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\nThe recommended model can be downloaded using the code snippet below:\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/agents\n```\n_A full list of supported open models is forthcoming._ \n\n## Build the Model Service\nThe complete instructions for building and deploying the Model Service can be found in the [llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\nThe AI Application can be built from the make command:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/agents from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/agents from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled applications.", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "langchain"] }, { "id": "summarizer", "description": "This recipe guides into creating custom LLM-powered summarization applications using Streamlit.", "name": "Summarizer", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/summarizer", "readme": "# Text Summarizer Application\n\n This recipe helps developers start building their own custom LLM enabled summarizer applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the summarizer application below.\n\n![](/assets/summarizer_ui.png) \n\n\n## Try the Summarizer Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Summarizer` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/summarizer.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/summarizer.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `summarizer`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the summarizer application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop summarizer\npodman pod rm summarizer\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/summarizer\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/summarizer from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/summarizer from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the summarizer application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled summarizer applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample summarizer workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the summarizer application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/summarizer-bootc:latest\n```\n\nUpon a reboot, you'll see that the summarizer service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status summarizer\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "langchain"] }, { "id": "codegeneration", "description": "This recipes showcases how to leverage LLM to build your own custom code generation application.", "name": "Code Generation", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "generator", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/codegen", "readme": "# Code Generation Application\n\n This recipe helps developers start building their own custom LLM enabled code generation applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the code generation application below.\n\n![](/assets/codegen_ui.png) \n\n\n## Try the Code Generation Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Code Generation` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/codegen.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/codegen.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `codegen`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the codegen application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop codegen\npodman pod rm codgen\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-8b-code-instruct](https://huggingface.co/ibm-granite/granite-8b-code-instruct-4k). This is a well\nperformant mid-sized model with an apache-2.0 license fine tuned for code generation. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-8b-code-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here:https://huggingface.co/ibm-granite/granite-8b-code-instruct-4k-GGUF.\n\nThere are a number of options for quantization level, but we recommend `Q4_K_M`. \n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-8b-code-instruct-4k-GGUF/resolve/main/granite-8b-code-instruct.Q4_K_M.gguf\ncd ../recipes/natural_language_processing/codgen\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/codegen from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/codegen from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the code generation application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled code generation applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample code generation workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the code generation application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/codegen-bootc:latest\n```\n\nUpon a reboot, you'll see that the codegen service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status codegen\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF", "hf.ibm-granite.granite-8b-code-instruct" ], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "langchain"] }, { "id": "rag", "description": "This application illustrates how to integrate RAG (Retrieval Augmented Generation) into LLM applications enabling to interact with your own documents.", "name": "RAG Chatbot", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/rag", "readme": "# RAG (Retrieval Augmented Generation) Chat Application\n\nThis demo provides a simple recipe to help developers start to build out their own custom RAG (Retrieval Augmented Generation) applications. It consists of three main components; the Model Service, the Vector Database and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\nIn order for the LLM to interact with our documents, we need them stored and available in such a manner that we can retrieve a small subset of them that are relevant to our query. To do this we employ a Vector Database alongside an embedding model. The embedding model converts our documents into numerical representations, vectors, such that similarity searches can be easily performed. The Vector Database stores these vectors for us and makes them available to the LLM. In this recipe we can use [chromaDB](https://docs.trychroma.com/) or [Milvus](https://milvus.io/) as our Vector Database.\n\nOur AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with our Model Service and we use [Streamlit](https://streamlit.io/) for our UI layer. Below please see an example of the RAG application. \n\n![](/assets/rag_ui.png)\n\n\n## Try the RAG chat application\n\n_COMING SOON to AI LAB_\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `RAG Chatbot` and follow the instructions to start the application.\n\nIf you prefer building and running the application from terminal, please run the following commands from this directory.\n\nFirst, build application's meta data and run the generated Kubernetes YAML which will spin up a Pod along with a number of containers:\n```\nmake quadlet\npodman kube play build/rag.yaml\n```\n\nThe Pod is named `rag`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n```\npodman pod list\npodman ps\n```\n\nTo stop and remove the Pod, run:\n```\npodman pod stop rag\npodman pod rm rag\n```\n\nOnce the Pod is running, please refer to the section below to [interact with the RAG chatbot application](#interact-with-the-ai-application).\n\n# Build the Application\n\nIn order to build this application we will need two models, a Vector Database, a Model Service and an AI Application. \n\n* [Download models](#download-models)\n* [Deploy the Vector Database](#deploy-the-vector-database)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n\n### Download models\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF). This is a well\nperformant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md).\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/rag\n```\n\n_A full list of supported open models is forthcoming._ \n\nIn addition to the LLM, RAG applications also require an embedding model to convert documents between natural language and vector representations. For this demo we will use [`BAAI/bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) it is a fairly standard model for this use case and has an MIT license. \n\nThe code snippet below can be used to pull a copy of the `BAAI/bge-base-en-v1.5` embedding model and store it in your `models/` directory. \n\n```python \nfrom huggingface_hub import snapshot_download\nsnapshot_download(repo_id=\"BAAI/bge-base-en-v1.5\",\n cache_dir=\"models/\",\n local_files_only=False)\n```\n\n### Deploy the Vector Database \n\nTo deploy the Vector Database service locally, simply use the existing ChromaDB or Milvus image. The Vector Database is ephemeral and will need to be re-populated each time the container restarts. When implementing RAG in production, you will want a long running and backed up Vector Database.\n\n\n#### ChromaDB\n```bash\npodman pull chromadb/chroma\n```\n```bash\npodman run --rm -it -p 8000:8000 chroma\n```\n#### Milvus\n```bash\npodman pull milvusdb/milvus:master-20240426-bed6363f\n```\n```bash\npodman run -it \\\n --name milvus-standalone \\\n --security-opt seccomp:unconfined \\\n -e ETCD_USE_EMBED=true \\\n -e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \\\n -e COMMON_STORAGETYPE=local \\\n -v $(pwd)/volumes/milvus:/var/lib/milvus \\\n -v $(pwd)/embedEtcd.yaml:/milvus/configs/embedEtcd.yaml \\\n -p 19530:19530 \\\n -p 9091:9091 \\\n -p 2379:2379 \\\n --health-cmd=\"curl -f http://localhost:9091/healthz\" \\\n --health-interval=30s \\\n --health-start-period=90s \\\n --health-timeout=20s \\\n --health-retries=3 \\\n milvusdb/milvus:master-20240426-bed6363f \\\n milvus run standalone 1> /dev/null\n```\nNote: For running the Milvus instance, make sure you have the `$(pwd)/volumes/milvus` directory and `$(pwd)/embedEtcd.yaml` file as shown in this repository. These are required by the database for its operations.\n\n\n### Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built with the following code snippet:\n\n```bash\ncd model_servers/llamacpp_python\npodman build -t llamacppserver -f ./base/Containerfile .\n```\n\n\n### Deploy the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. You can start your local Model Service using the following Podman command:\n```\npodman run --rm -it \\\n -p 8001:8001 \\\n -v Local/path/to/locallm/models:/locallm/models \\\n -e MODEL_PATH=models/ \\\n -e HOST=0.0.0.0 \\\n -e PORT=8001 \\\n llamacppserver\n```\n\n### Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application image in the `rag-langchain/` directory.\n\n```bash\ncd rag\nmake APP_IMAGE=rag build\n```\n\n### Deploy the AI Application\n\nMake sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.\n\nThere also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database. \n\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run --rm -it -p 8501:8501 \\\n-e MODEL_ENDPOINT=http://10.88.0.1:8001 \\\n-e VECTORDB_HOST=10.88.0.1 \\\n-v Local/path/to/locallm/models/:/rag/models \\\nrag \n```\n\n### Interact with the AI Application\n\nEverything should now be up an running with the rag application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled RAG applications. \n\n### Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample RAG chatbot workload as a service that starts when a system is booted, cd into this folder\nand run:\n\n\n```\nmake BOOTC_IMAGE=quay.io/your/rag-bootc:latest bootc\n```\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 BOOTC_IMAGE=quay.io/your/rag-bootc:latest bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the RAG chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```\nbootc switch quay.io/your/rag-bootc:latest\n```\n\nUpon a reboot, you'll see that the RAG chatbot service is running on the system.\n\nCheck on the service with\n\n```\nssh user@bootc-system-ip\nsudo systemctl status rag\n```\n\n#### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n##### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n\n### Makefile variables\n\nThere are several [Makefile variables](../../common/README.md) defined within each `recipe` Makefile which can be\nused to override defaults for a variety of make targets.\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "langchain", "vectordb"] }, { "id": "rag-nodejs", "description": "This application illustrates how to integrate RAG (Retrieval Augmented Generation) into LLM applications written in Node.js enabling to interact with your own documents.", "name": "Node.js RAG Chatbot", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/rag-nodejs", "readme": "# RAG (Retrieval Augmented Generation) Chat Application\n\nThis demo provides a simple recipe to help Node.js developers start to build out their own custom RAG (Retrieval Augmented Generation) applications. It consists of three main components; the Model Service, the Vector Database and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\nIn order for the LLM to interact with our documents, we need them stored and available in such a manner that we can retrieve a small subset of them that are relevant to our query. To do this we employ a Vector Database alongside an embedding model. The embedding model converts our documents into numerical representations, vectors, such that similarity searches can be easily performed. The Vector Database stores these vectors for us and makes them available to the LLM. In this recipe we can use [chromaDB](https://docs.trychroma.com/) as our Vector Database.\n\nOur AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://js.langchain.com/docs/introduction/) package to simplify communication with our Model Service and we use [React Chatbotify](https://react-chatbotify.com/) and [Next.js](https://nextjs.org/) for our UI layer. Below please see an example of the RAG application. \n\n![](/assets/rag_nodejs.png)\n\n\n## Try the RAG chat application\n\n_COMING SOON to AI LAB_\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `RAG Node.js Chatbot` and follow the instructions to start the application.\n\nIf you prefer building and running the application from terminal, please run the following commands from this directory.\n\nFirst, build application's meta data and run the generated Kubernetes YAML which will spin up a Pod along with a number of containers:\n```\nmake quadlet\npodman kube play build/rag-nodesjs.yaml\n```\n\nThe Pod is named `rag_nodejs`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n```\npodman pod list\npodman ps\n```\n\nTo stop and remove the Pod, run:\n```\npodman pod stop rag_nodejs\npodman pod rm rag_nodejs\n```\n\nOnce the Pod is running, please refer to the section below to [interact with the RAG chatbot application](#interact-with-the-ai-application).\n\n# Build the Application\n\nIn order to build this application we will need two models, a Vector Database, a Model Service and an AI Application. \n\n* [Download models](#download-models)\n* [Deploy the Vector Database](#deploy-the-vector-database)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n\n### Download models\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF). This is a well\nperformant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md).\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/rag_nodejs\n```\n\n_A full list of supported open models is forthcoming._ \n\n### Deploy the Vector Database \n\nTo deploy the Vector Database service locally, simply use the existing ChromaDB. The Vector Database is ephemeral and will need to be re-populated each time the container restarts. When implementing RAG in production, you will want a long running and backed up Vector Database.\n\n\n#### ChromaDB\n```bash\npodman pull chromadb/chroma\n```\n```bash\npodman run --rm -it -p 8000:8000 chroma\n```\n\n### Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built with the following code snippet:\n\n```bash\ncd model_servers/llamacpp_python\npodman build -t llamacppserver -f ./base/Containerfile .\n```\n\n\n### Deploy the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. You can start your local Model Service using the following Podman command:\n```\npodman run --rm -it \\\n -p 8001:8001 \\\n -v Local/path/to/locallm/models:/locallm/models \\\n -e MODEL_PATH=models/ \\\n -e HOST=0.0.0.0 \\\n -e PORT=8001 \\\n llamacppserver\n```\n\n### Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application image in the `rag-nodejs/` directory.\n\n```bash\ncd rag-nodejs\nmake APP_IMAGE=rag-nodejs build\n```\n\n### Deploy the AI Application\n\nMake sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.\n\nThere also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database. \n\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run --rm -it -p 8501:8501 \\\n-e MODEL_ENDPOINT=http://10.88.0.1:8001 \\\n-e VECTORDB_HOST=10.88.0.1 \\\n-v Local/path/to/locallm/models/:/rag/models \\\nrag-nodejs \n```\n\n### Interact with the AI Application\n\nEverything should now be up an running with the rag application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled RAG applications. \n\n### Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample RAG chatbot workload as a service that starts when a system is booted, cd into this folder\nand run:\n\n\n```\nmake BOOTC_IMAGE=quay.io/your/rag-nodejs-bootc:latest bootc\n```\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 BOOTC_IMAGE=quay.io/your/rag-nodejs-bootc:latest bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the RAG Node.js chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```\nbootc switch quay.io/your/rag-nodejs-bootc:latest\n```\n\nUpon a reboot, you'll see that the RAG Node.js chatbot service is running on the system.\n\nCheck on the service with\n\n```\nssh user@bootc-system-ip\nsudo systemctl status raa-nodejsg\n```\n\n#### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n##### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n\n### Makefile variables\n\nThere are several [Makefile variables](../../common/README.md) defined within each `recipe` Makefile which can be\nused to override defaults for a variety of make targets.\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["javascript"], "frameworks": ["react", "langchain", "vectordb"] }, { "id": "chatbot-java-quarkus", "description": "This is a Java Quarkus-based recipe demonstrating how to create an AI-powered chat applications.", "name": "Java-based ChatBot (Quarkus)", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/chatbot-java-quarkus", "readme": "# Java-based chatbot application\n\nThis application implements a simple chatbot backed by Quarkus and its\nLangChain4j extension. The UI communicates with the backend application via\nweb sockets and the backend uses the OpenAI API to talk to the model served\nby Podman AI Lab.\n\nDocumentation for Quarkus+LangChain4j can be found at\nhttps://docs.quarkiverse.io/quarkus-langchain4j/dev/.", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["java"], "frameworks": ["quarkus", "langchain4j"] }, { "id": "chatbot-javascript-react", "description": "This is a NodeJS based recipe demonstrating how to create an AI-powered chat applications.", "name": "Node.js based ChatBot", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/chatbot-nodejs", "readme": "# Chat Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications using Node.js and JavaScript. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's]( https://js.langchain.com/docs/introduction) JavaScript package to simplify communication with the Model Service and uses [react-chatbotify](https://react-chatbotify.com/) for the UI layer. You can find an example of the chat application below.\n\n![](/assets/chatbot_nodejs_ui.png) \n\n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Node.js based Chatbot` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot-nodejs.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot-nodejs.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `nodejs chat app`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the chatbot application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot-nodejs\npodman pod rm chatbot-nodejs\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot-nodejs\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-nodejs from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-nodejs from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-nodejs-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot-nodejs\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "hf.ibm-research.granite-3.2-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["javascript"], "frameworks": ["react", "langchain"] }, { "id": "function-calling", "description": "This recipes guides into multiple function calling use cases, showing the ability to structure data and chain multiple tasks, using Streamlit.", "name": "Function calling", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/function_calling", "readme": "# Function Calling Application\n\n This recipe helps developers start building their own custom function calling enabled chat applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the chat application below.\n\n![](/assets/chatbot_ui.png) \n\n\n## Try the Function Calling Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Function Calling` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `chatbot`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. However, if you started the app via the podman desktop UI, a random port will be assigned instead of `8501`. Please use the AI App Details `Open AI App` button to access it instead. \nPlease refer to the section below for more details about [interacting with the chatbot application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot\npodman pod rm chatbot\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "langchain"] }, { "id": "function-calling-nodejs", "description": "This recipes guides into multiple function calling use cases, showing the ability to structure data and chain multiple tasks, using Streamlit.", "name": "Node.js Function calling", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/function-calling-nodejs", "readme": "# Function Calling Application\n\n This recipe helps developers start building their own AI applications with function calling capabilities. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://js.langchain.com/v0.2/docs/introduction/) Typescript package to simplify communication with the Model Service and [langgraph.js](https://langchain-ai.github.io/langgraphjs/) to enable the LLM to call functions. It uses [fastify](https://fastify.dev/) as the backend-server and chart.js to plot the weather data returned. You can find an example of the chat application below.\n\n![](/assets/function_calling_nodejs_ui.png)\n\n\n## Try the Function Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `function-calling-nodejs` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/function-calling-nodejs.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `function-calling-nodejs`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. However, if you started the app via the podman desktop UI, a random port will be assigned instead of `8501`. Please use the AI App Details `Open AI App` button to access it instead. \nPlease refer to the section below for more details about [interacting with the function calling application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop function-calling-nodejs\npodman pod rm function-calling-nodejs\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/function-calling-nodejs\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the function calling application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled function calling applications.\n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/function-calling-nodejs-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status function-calling-nodejs\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n", "recommended": [ "hf.ibm-granite.granite-4.0-micro-GGUF", "hf.ibm-granite.granite-4.0-tiny-GGUF", "hf.ibm-granite.granite-3.3-8b-instruct-GGUF" ], "backend": "llama-cpp", "languages": ["javascript"], "frameworks": ["langchain.js", "langgraph", "fastify"] }, { "id": "graph-rag", "description": "This demo provides a recipe to build out a custom Graph RAG (Graph Retrieval Augmented Generation) application using the repo LightRag which abstracts Microsoft's GraphRag implementation. It consists of two main components; the Model Service, and the AI Application with a built in Database.", "name": "Graph RAG Chat Application", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/graph-rag", "readme": "# Graph RAG (Retrieval Augmented Generation) Chat Application\nThis demo provides a recipe to build out a custom Graph RAG (Graph Retrieval Augmented Generation) application using the repo LightRag which abstracts Microsoft's GraphRag implementation. It consists of two main components; the Model Service, and the AI Application with a built in Database.\nThere are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\nLightRag simplifies development by handling the Vectordb setup automatically, while also offering experienced developers the flexibility to choose from various Vectordb options based on their preferences for usability and scalability.\nOur AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with our Model Service and we use [Streamlit](https://streamlit.io/) for our UI layer. Below please see an example of the RAG application. \n\n## Try the RAG chat application\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Graph Rag` and follow the instructions to start the application.\n\n## Models that work with this Recipe\nNot all models work with this Recipe try out mistral or llama models! \n\n# Build the Application\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the Pod above is built, run, and what purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command builds the application's metadata and generates Kubernetes YAML at `./build/graph-rag.yaml` to spin up a Pod that can then be launched locally. Try it with:\n```\nmake quadlet\npodman kube play build/graph-rag.yaml\n```\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `graph-rag`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n```\npodman pod list\npodman ps\n```\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. However, if you started the app via the podman desktop UI, a random port will be assigned instead of `8501`. Please use the AI App Details `Open AI App` button to access it instead. Please refer to the section below for more details about [interacting with the Graph Rag application](#interact-with-the-ai-application).\nTo stop and remove the Pod, run:\n```\npodman pod stop graph-rag\npodman pod rm graph-rag\n```\n\n## Download a model\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of ways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from [huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\nThe recommended model can be downloaded using the code snippet below:\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/graph-rag\n```\n_A full list of supported open models is forthcoming._ \n\n## Build the Model Service\nThe complete instructions for building and deploying the Model Service can be found in the [llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\nThe AI Application can be built from the make command:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/graph-rag from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/graph-rag from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled graph-rag applications. \n\n## Embed the AI Application in a Bootable Container Image\nTo build a bootable container image that includes this sample graph-rag workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n```\nmake ARCH=x86_64 bootc\n```\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the graph-rag application, it's as simple as ssh-ing into the bootc system and running:\n```bash\nbootc switch quay.io/ai-lab/graph-rag-bootc:latest\n```\nUpon a reboot, you'll see that the graph-rag service is running on the system. Check on the service with:\n```bash\nssh user@bootc-system-ip\nsudo systemctl status graph-rag\n```\n\n### What are bootable containers?\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than at _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system. Bootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization tools. Might I suggest [podman](https://podman.io/)?\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI image registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think factories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\nYou can convert a bootc image to a bootable disk image using the [quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\nDefault image types can be set via the DISK_TYPE Makefile variable.\n`make bootc-image-builder DISK_TYPE=ami`", "recommended": [], "backend": "llama-cpp", "languages": ["python"], "frameworks": ["streamlit", "lightrag"] }, { "id": "audio_to_text", "description": "This application demonstrate how to use LLM for transcripting an audio into text.", "name": "Audio to Text", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "generator", "categories": ["audio"], "basedir": "recipes/audio/audio_to_text", "readme": "# Audio to Text Application\n\nThis recipe helps developers start building their own custom AI enabled audio transcription applications. It consists of two main components: the Model Service and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use [`whisper-cpp`](https://github.com/ggerganov/whisper.cpp.git) and its included Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/whispercpp/base/Containerfile`](/model_servers/whispercpp/base/Containerfile).\n\nThe AI Application will connect to the Model Service via an API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the audio to text application below.\n\n\n![](/assets/whisper.png) \n\n## Try the Audio to Text Application:\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Audio to Text` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the application above is built, run, and what purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n * [Input audio files](#input-audio-files)\n\n## Download a model\n\nIf you are just getting started, we recommend using [ggerganov/whisper.cpp](https://huggingface.co/ggerganov/whisper.cpp).\nThis is a well performant model with an MIT license.\nIt's simple to download a pre-converted whisper model from [huggingface.co](https://huggingface.co)\nhere: https://huggingface.co/ggerganov/whisper.cpp. There are a number of options, but we recommend to start with `ggml-small.bin`.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin\ncd ../recipes/audio/audio_to_text\n```\n\n_A full list of supported open models is forthcoming._\n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [whispercpp model-service document](../../../model_servers/whispercpp/README.md).\n\n```bash\n# from path model_servers/whispercpp from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/whispercpp/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/whispercpp` set with reasonable defaults:\n\n```bash\n# from path model_servers/whispercpp from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application\nimage from the [`audio-to-text/`](./) directory.\n\n```bash\n# from path recipes/audio/audio_to_text from repo containers/ai-lab-recipes\npodman build -t audio-to-text app\n```\n### Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image.\nWhen starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`.\nThis could be any appropriately hosted Model Service (running locally or in the cloud) using a compatible API.\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001/inference audio-to-text \n```\n\n### Interact with the AI Application\n\nOnce the streamlit application is up and running, you should be able to access it at `http://localhost:8501`.\nFrom here, you can upload audio files from your local machine and translate the audio files as shown below.\n\nBy using this recipe and getting this starting point established,\nusers should now have an easier time customizing and building their own AI enabled applications.\n\n#### Input audio files\n\nWhisper.cpp requires as an input 16-bit WAV audio files.\nTo convert your input audio files to 16-bit WAV format you can use `ffmpeg` like this:\n\n```bash\nffmpeg -i -ar 16000 -ac 1 -c:a pcm_s16le \n```\n", "recommended": ["hf.ggerganov.whisper.cpp"], "backend": "whisper-cpp", "languages": ["python"], "frameworks": ["streamlit"] }, { "id": "object_detection", "description": "This recipe illustrates how to use LLM to interact with images and build object detection applications.", "name": "Object Detection", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "generator", "categories": ["computer-vision"], "basedir": "recipes/computer_vision/object_detection", "readme": "# Object Detection\n\nThis recipe helps developers start building their own custom AI enabled object detection applications. It consists of two main components: the Model Service and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use our FastAPI [`object_detection_python`](../../../model_servers/object_detection_python/src/object_detection_server.py) model server. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/object_detection_python/base/Containerfile`](/model_servers/object_detection_python/base/Containerfile).\n\nThe AI Application will connect to the Model Service via an API. The recipe relies on [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the object detection application below.\n\n![](/assets/object_detection.png) \n\n## Try the Object Detection Application:\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Object Detection` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the application above is built, run, and what purpose it serves in the overall application. All the Model Server elements of the recipe use a central Model Server [Makefile](../../../model_servers/common/Makefile.common) that includes variables populated with default values to simplify getting started. Currently we do not have a Makefile for the Application elements of the Recipe, but this coming soon, and will leverage the recipes common [Makefile](../../common/Makefile.common) to provide variable configuration and reasonable defaults to this Recipe's application.\n\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n\n## Download a model\n\nIf you are just getting started, we recommend using [facebook/detr-resnet-101](https://huggingface.co/facebook/detr-resnet-101).\nThis is a well performant model with an Apache-2.0 license.\nIt's simple to download a copy of the model from [huggingface.co](https://huggingface.co)\n\nYou can use the `download-model-facebook-detr-resnet-101` make target in the `model_servers/object_detection_python` directory to download and move the model into the models directory for you:\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\n make download-model-facebook-detr-resnet-101\n```\n\n## Build the Model Service\n\nThe You can build the Model Service from the [object_detection_python model-service directory](../../../model_servers/object_detection_python).\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\nmake build\n```\n\nCheckout the [Makefile](../../../model_servers/object_detection_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from the [`model_servers/object_detection_python`](../../../model_servers/object_detection_python) directory, which will be set with reasonable defaults:\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\nmake run\n```\n\nAs stated above, by default the model service will use [`facebook/detr-resnet-101`](https://huggingface.co/facebook/detr-resnet-101). However you can use other compatabale models. Simply pass the new `MODEL_NAME` and `MODEL_PATH` to the make command. Make sure the model is downloaded and exists in the [models directory](../../../models/):\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\nmake MODEL_NAME=facebook/detr-resnet-50 MODEL_PATH=/models/facebook/detr-resnet-50 run\n```\n\n## Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application\nimage from the [`object_detection/`](./) recipe directory.\n\n```bash\n# from path recipes/computer_vision/object_detection from repo containers/ai-lab-recipes\npodman build -t object_detection_client .\n```\n\n### Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image.\nWhen starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`.\nThis could be any appropriately hosted Model Service (running locally or in the cloud) using a compatible API.\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8000/detection object_detection_client\n```\n\n### Interact with the AI Application\n\nOnce the client is up a running, you should be able to access it at `http://localhost:8501`. From here you can upload images from your local machine and detect objects in the image as shown below. \n\nBy using this recipe and getting this starting point established,\nusers should now have an easier time customizing and building their own AI enabled applications.\n", "recommended": ["hf.facebook.detr-resnet-101"], "backend": "none", "languages": ["python"], "frameworks": ["streamlit"] }, { "id": "chatbot-llama-stack", "description": "This recipe provides a blueprint for developers to create their own AI-powered chat applications using Streamlit and llama-stack.", "name": "ChatBot using Llama Stack", "repository": "https://github.com/containers/ai-lab-recipes", "ref": "v1.8.0", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "recipes/natural_language_processing/chatbot-llama-stack", "readme": "# Chat Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications.\n\n There are a few options today for local Model Serving, but this recipe will use [`Llama Stack`](https://llama-stack.readthedocs.io/en/latest/).\n\n The AI Application will connect to the Model Service via its API. The recipe relies on [Llama Stack Client Python SDK](https://github.com/meta-llama/llama-stack-client-python) to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. \n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot using Llama Stack` and follow the instructions to start the application.\n", "backend": "llama-stack", "languages": ["python"], "frameworks": ["streamlit", "llama-stack"] } ], "models": [ { "id": "hf.mistralai.mistral-small-3.2-24b-instruct-2506", "name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "description": "Mistral-Small-3.2-24B-Instruct-2506 is a minor update of [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).\r\n\r\nSmall-3.2 improves in the following categories:\r\n- **Instruction following**: Small-3.2 is better at following precise instructions\r\n- **Repetition errors**: Small-3.2 produces less infinite generations or repetitive answers\r\n- **Function calling**: Small-3.2's function calling template is more robust (see [here](https://github.com/mistralai/mistral-common/blob/535b4d0a0fc94674ea17db6cf8dc2079b81cbcfa/src/mistral_common/tokens/tokenizers/instruct.py#L778) and [examples](#function-calling))\r\n\r\nIn all other categories Small-3.2 should match or slightly improve compared to [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).\r\n\r\n## Key Features\r\n- same as [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503#key-features)\r\n\r\n## Benchmark Results\r\nWe compare Mistral-Small-3.2-24B to [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).\r\nFor more comparison against other models of similar size, please check [Mistral-Small-3.1's Benchmarks'](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503#benchmark-results)\r\n\r\n### Text \r\n#### Instruction Following / Chat / Tone\r\n| Model | Wildbench v2 | Arena Hard v2 | IF (Internal; accuracy) |\r\n|-------|---------------|---------------|------------------------|\r\n| Small 3.1 24B Instruct | 55.6% | 19.56% | 82.75% |\r\n| **Small 3.2 24B Instruct** | **65.33%** | **43.1%** | **84.78%** |\r\n\r\n#### Infinite Generations\r\nSmall 3.2 reduces infinite generations by 2x on challenging, long and repetitive prompts.\r\n| Model | Infinite Generations (Internal; Lower is better) |\r\n|-------|-------|\r\n| Small 3.1 24B Instruct | 2.11% |\r\n| **Small 3.2 24B Instruct** | **1.29%** |\r\n\r\n#### STEM\r\n| Model | MMLU | MMLU Pro (5-shot CoT) | MATH | GPQA Main (5-shot CoT) | GPQA Diamond (5-shot CoT) | MBPP Plus - Pass@5 | HumanEval Plus - Pass@5 | SimpleQA (TotalAcc) |\r\n|-------|------|---------------------|------|------------------------|---------------------------|-------------------|-------------------------|-------------------|\r\n| Small 3.1 24B Instruct | 80.62% | 66.76% | 69.30% | 44.42% | 45.96% | 74.63% | 88.99% | 10.43% |\r\n| **Small 3.2 24B Instruct** | 80.50% | **69.06%** | 69.42% | 44.22% | 46.13% | **78.33%** | **92.90%** | **12.10%** |\r\n\r\n### Vision\r\n| Model | MMMU | Mathvista | ChartQA | DocVQA | AI2D |\r\n|-------|------|-----------|---------|--------|------|\r\n| Small 3.1 24B Instruct | **64.00%** | **68.91%** | 86.24% | 94.08% | 93.72% |\r\n| **Small 3.2 24B Instruct** | 62.50% | 67.09% | **87.4%** | 94.86% | 92.91% |\r\n\r\n## Usage\r\nThe model can be used with the following frameworks:\r\n- [`vllm (recommended)`](https://github.com/vllm-project/vllm)\r\n- [`transformers`](https://github.com/huggingface/transformers)\r\n\r\n**Note 1**: We recommend using a relatively low temperature, such as `temperature=0.15`.\r\n**Note 2**: Add a system prompt from [SYSTEM_PROMPT.txt](https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506/blob/main/SYSTEM_PROMPT.txt) for best results.\r\n\r\n### vLLM (recommended)\r\n#### Installation\r\n```\r\npip install vllm --upgrade\r\n```\r\nCheck installation:\r\n```\r\npython -c \"import mistral_common; print(mistral_common.__version__)\"\r\n```\r\n#### Serve\r\n```\r\nvllm serve mistralai/Mistral-Small-3.2-24B-Instruct-2506 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --limit_mm_per_prompt 'image=10' --tensor-parallel-size 2\r\n```\r\nRequires ~55 GB GPU RAM in bf16/fp16.\r\n\r\n#### Function Calling, Vision Reasoning & Instruction Following\r\nSupports multi-modal reasoning, function/tool calls, and precise instruction following using vLLM API or Transformers. See examples in original README.\r\n\r\n### Transformers\r\nInstall:\r\n```\r\npip install mistral-common --upgrade\r\n```\r\nUse `MistralTokenizer` and `Mistral3ForConditionalGeneration` with the system prompt and optional images for reasoning. Multi-modal inputs and outputs supported. Refer to Python snippets for examples of instruction following, vision reasoning, and function calls.", "license": "Apache-2.0", "url": "https://huggingface.co/unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF/resolve/main/Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf", "memory": 14300000000, "sha256": "a3cc56310807ed0d145eaf9f018ccda9ae7ad8edb41ec870aa2454b0d4700b3c", "backend": "llama-cpp", "properties": { "jinja": "true" } }, { "id": "hf.openai.gpt-oss-20b", "name": "openai/gpt-oss-20b (Unsloth quantization)", "description": "\r\n# Welcome to the gpt-oss series, [OpenAI’s open-weight models](https://openai.com/open-models) designed for powerful reasoning, agentic tasks, and versatile developer use cases.\r\n\r\nWe’re releasing two flavors of the open models:\r\n- `gpt-oss-120b` — for production, general purpose, high reasoning use cases that fits into a single H100 GPU (117B parameters with 5.1B active parameters)\r\n- `gpt-oss-20b` — for lower latency, and local or specialized use cases (21B parameters with 3.6B active parameters)\r\n\r\nBoth models were trained on our [harmony response format](https://github.com/openai/harmony) and should only be used with the harmony format as it will not work correctly otherwise.\r\n\r\n> [!NOTE]\r\n> This model card is dedicated to the smaller `gpt-oss-20b` model. Check out [`gpt-oss-120b`](https://huggingface.co/openai/gpt-oss-120b) for the larger model.\r\n\r\n# Highlights\r\n\r\n* **Permissive Apache 2.0 license:** Build freely without copyleft restrictions or patent risk—ideal for experimentation, customization, and commercial deployment.\r\n* **Configurable reasoning effort:** Easily adjust the reasoning effort (low, medium, high) based on your specific use case and latency needs.\r\n* **Full chain-of-thought:** Gain complete access to the model’s reasoning process, facilitating easier debugging and increased trust in outputs. It’s not intended to be shown to end users.\r\n* **Fine-tunable:** Fully customize models to your specific use case through parameter fine-tuning.\r\n* **Agentic capabilities:** Use the models’ native capabilities for function calling, [web browsing](https://github.com/openai/gpt-oss/tree/main?tab=readme-ov-file#browser), [Python code execution](https://github.com/openai/gpt-oss/tree/main?tab=readme-ov-file#python), and Structured Outputs.\r\n* **Native MXFP4 quantization:** The models are trained with native MXFP4 precision for the MoE layer, making `gpt-oss-120b` run on a single H100 GPU and the `gpt-oss-20b` model run within 16GB of memory.\r\n\r\n---\r\n\r\n# Inference examples\r\n\r\n## Transformers\r\nYou can use `gpt-oss-120b` and `gpt-oss-20b` with Transformers. If you use the Transformers chat template, it will automatically apply the [harmony response format](https://github.com/openai/harmony). If you use `model.generate` directly, you need to apply the harmony format manually using the chat template or use our [openai-harmony](https://github.com/openai/harmony) package.\r\n\r\nTo get started, install the necessary dependencies:\r\n```\r\npip install -U transformers kernels torch \r\n```\r\n\r\n```py\r\nfrom transformers import pipeline\r\nimport torch\r\n\r\nmodel_id = \"openai/gpt-oss-20b\"\r\n\r\npipe = pipeline(\r\n \"text-generation\",\r\n model=model_id,\r\n torch_dtype=\"auto\",\r\n device_map=\"auto\",\r\n)\r\n\r\nmessages = [\r\n {\"role\": \"user\", \"content\": \"Explain quantum mechanics clearly and concisely.\"},\r\n]\r\n\r\noutputs = pipe(\r\n messages,\r\n max_new_tokens=256,\r\n)\r\nprint(outputs[0][\"generated_text\"][-1])\r\n```\r\n\r\n## vLLM\r\nvLLM recommends using [uv](https://docs.astral.sh/uv/) for Python dependency management. You can spin up an OpenAI-compatible webserver:\r\n```\r\nuv pip install --pre vllm==0.10.1+gptoss \\\r\n --extra-index-url https://wheels.vllm.ai/gpt-oss/ \\\r\n --extra-index-url https://download.pytorch.org/whl/nightly/cu128 \\\r\n --index-strategy unsafe-best-match\r\n\r\nvllm serve openai/gpt-oss-20b\r\n```\r\n\r\n## PyTorch / Triton\r\nSee [reference implementations](https://github.com/openai/gpt-oss?tab=readme-ov-file#reference-pytorch-implementation).\r\n\r\n## Ollama\r\n```bash\r\n# gpt-oss-20b\r\nollama pull gpt-oss:20b\r\nollama run gpt-oss:20b\r\n```\r\n\r\n## LM Studio\r\n```bash\r\n# gpt-oss-20b\r\nlms get openai/gpt-oss-20b\r\n```\r\n\r\n# Download the model\r\n```bash\r\n# gpt-oss-20b\r\nhuggingface-cli download openai/gpt-oss-20b --include \"original/*\" --local-dir gpt-oss-20b/\npip install gpt-oss\npython -m gpt_oss.chat model/\r\n```\r\n\r\n# Reasoning levels\r\n* **Low:** Fast responses for general dialogue.\r\n* **Medium:** Balanced speed and detail.\r\n* **High:** Deep and detailed analysis.\r\n\r\n# Tool use\r\n* Web browsing (built-in tools)\r\n* Function calling with schemas\r\n* Agentic operations\r\n\r\n# Fine-tuning\r\nThe smaller model `gpt-oss-20b` can be fine-tuned on consumer hardware, larger `gpt-oss-120b` can be fine-tuned on a single H100 node.", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/unsloth/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-Q4_K_M.gguf", "memory": 11600000000, "sha256": "c27536640e410032865dc68781d80a08b98f8db5e93575919af8ccc0568aeb4f", "backend": "llama-cpp" }, { "id": "hf.qwen.qwen3-4b-GGUF", "name": "qwen/qwen3-4b-GGUF", "description": "\r\n# Qwen3-4B-GGUF\r\n\r\n \"Chat\"\r\n<\/a>\r\n\r\n## Qwen3 Highlights\r\n\r\nQwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:\r\n\r\n- **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.\r\n- **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.\r\n- **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.\r\n- **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.\r\n- **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.\r\n\r\n\r\n## Model Overview\r\n\r\n**Qwen3-4B** has the following features:\r\n- Type: Causal Language Models\r\n- Training Stage: Pretraining & Post-training\r\n- Number of Parameters: 4.0B\r\n- Number of Paramaters (Non-Embedding): 3.6B\r\n- Number of Layers: 36\r\n- Number of Attention Heads (GQA): 32 for Q and 8 for KV\r\n- Context Length: 32,768 natively and [131,072 tokens with YaRN](#processing-long-texts).\r\n\r\n- Quantization: q4_K_M, q5_0, q5_K_M, q6_K, q8_0\r\n\r\nFor more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https:\/\/qwenlm.github.io\/blog\/qwen3\/), [GitHub](https:\/\/github.com\/QwenLM\/Qwen3), and [Documentation](https:\/\/qwen.readthedocs.io\/en\/latest\/).\r\n\r\n## Quickstart\r\n\r\n### llama.cpp\r\n\r\nCheck out our [llama.cpp documentation](https:\/\/qwen.readthedocs.io\/en\/latest\/run_locally\/llama.cpp.html) for more usage guide.\r\n\r\nWe advise you to clone [`llama.cpp`](https:\/\/github.com\/ggerganov\/llama.cpp) and install it following the official guide. We follow the latest version of llama.cpp. \r\nIn the following demonstration, we assume that you are running commands under the repository `llama.cpp`.\r\n\r\n```shell\r\n.\/llama-cli -hf Qwen\/Qwen3-4B-GGUF:Q8_0 --jinja --color -ngl 99 -fa -sm row --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --presence-penalty 1.5 -c 40960 -n 32768 --no-context-shift\r\n```\r\n\r\n### ollama\r\n\r\nCheck out our [ollama documentation](https:\/\/qwen.readthedocs.io\/en\/latest\/run_locally\/ollama.html) for more usage guide.\r\n\r\nYou can run Qwen3 with one command:\r\n\r\n```shell\r\nollama run hf.co\/Qwen\/Qwen3-4B-GGUF:Q8_0\r\n```\r\n\r\n## Switching Between Thinking and Non-Thinking Mode\r\n\r\nYou can add `\/think` and `\/no_think` to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations.\r\n\r\nHere is an example of multi-turn conversation:\r\n\r\n```\r\n> Who are you \/no_think\r\n\r\n\r\n\r\n<\/think>\r\n\r\nI am Qwen, a large-scale language model developed by Alibaba Cloud. [...]\r\n\r\n> How many 'r's are in 'strawberries'? \/think\r\n\r\n\r\nOkay, let's see. The user is asking how many times the letter 'r' appears in the word \"strawberries\". [...]\r\n<\/think>\r\n\r\nThe word strawberries contains 3 instances of the letter r. [...]\r\n```\r\n\r\n\r\n## Processing Long Texts\r\n\r\nQwen3 natively supports context lengths of up to 32,768 tokens. For conversations where the total length (including both input and output) significantly exceeds this limit, we recommend using RoPE scaling techniques to handle long texts effectively. We have validated the model's performance on context lengths of up to 131,072 tokens using the [YaRN](https:\/\/arxiv.org\/abs\/2309.00071) method.\r\n\r\nTo enable YARN in ``llama.cpp``:\r\n\r\n```shell\r\n.\/llama-cli ... -c 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768\r\n```\r\n\r\n> [!NOTE]\r\n> All the notable open-source frameworks implement static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.**\r\n> We advise adding the `rope_scaling` configuration only when processing long contexts is required. \r\n> It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 65,536 tokens, it would be better to set `factor` as 2.0. \r\n\r\n> [!TIP]\r\n> The endpoint provided by Alibaba Model Studio supports dynamic YaRN by default and no extra configuration is needed.\r\n\r\n\r\n## Best Practices\r\n\r\nTo achieve optimal performance, we recommend the following settings:\r\n\r\n1. **Sampling Parameters**:\r\n - For thinking mode (`enable_thinking=True`), use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, `MinP=0`, and `PresencePenalty=1.5`. **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions.\r\n - For non-thinking mode (`enable_thinking=False`), we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, `MinP=0`, and `PresencePenalty=1.5`.\r\n - **We recommend setting `presence_penalty` to 1.5 for quantized models to suppress repetitive outputs.** You can adjust the `presence_penalty` parameter between 0 and 2. A higher value may occasionally lead to language mixing and a slight reduction in model performance. \r\n\r\n2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 38,912 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.\r\n\r\n3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.\r\n - **Math Problems**: Include \"Please reason step by step, and put your final answer within \\boxed{}.\" in the prompt.\r\n - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: \"Please show your choice in the `answer` field with only the choice letter, e.g., `\"answer\": \"C\"`.\"\r\n\r\n4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.\r\n\r\n### Citation\r\n\r\nIf you find our work helpful, feel free to give us a cite.\r\n\r\n```\r\n@misc{qwen3technicalreport,\r\n title={Qwen3 Technical Report}, \r\n author={Qwen Team},\r\n year={2025},\r\n eprint={2505.09388},\r\n archivePrefix={arXiv},\r\n primaryClass={cs.CL},\r\n url={https:\/\/arxiv.org\/abs\/2505.09388}, \r\n}\r\n```", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/Qwen/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf", "sha256": "7485fe6f11af29433bc51cab58009521f205840f5b4ae3a32fa7f92e8534fdf5", "backend": "llama-cpp" }, { "id": "hf.unsloth.qwen3-4b-thinking-GGUF", "name": "qwen/Qwen3-4B-Thinking-2507-GGUF (Unsloth quantization)", "description": "---\nlibrary_name: transformers\nlicense: apache-2.0\nlicense_link: https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507/blob/main/LICENSE\nbase_model:\n- Qwen/Qwen3-4B-Thinking-2507\ntags:\n- qwen\n- qwen3\n- unsloth\n---\n
\n

\n See our collection for all versions of Qwen3 including GGUF, 4-bit & 16-bit formats.\n

\n

\n Learn to run Qwen3-2507 correctly - Read our Guide.\n

\n

\n Unsloth Dynamic 2.0 achieves superior accuracy & outperforms other leading quants.\n

\n
\n \n \n \n \n \n \n \n \n \n
\n

✨ Read our Qwen3-2507 Guide here!

\n
\n\n- Fine-tune Qwen3 (14B) for free using our Google [Colab notebook here](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n- Read our Blog about Qwen3 support: [unsloth.ai/blog/qwen3](https://unsloth.ai/blog/qwen3)\n- View the rest of our notebooks in our [docs here](https://docs.unsloth.ai/get-started/unsloth-notebooks).\n- Run & export your fine-tuned model to Ollama, llama.cpp or HF.\n\n| Unsloth supports | Free Notebooks | Performance | Memory use |\n|-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|\n| **Qwen3 (14B)** | [▶\uFE0F Start on Colab](https://docs.unsloth.ai/get-started/unsloth-notebooks) | 3x faster | 70% less |\n| **GRPO with Qwen3 (8B)** | [▶\uFE0F Start on Colab](https://docs.unsloth.ai/get-started/unsloth-notebooks) | 3x faster | 80% less |\n| **Llama-3.2 (3B)** | [▶\uFE0F Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb) | 2.4x faster | 58% less |\n| **Llama-3.2 (11B vision)** | [▶\uFE0F Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb) | 2x faster | 60% less |\n| **Qwen2.5 (7B)** | [▶\uFE0F Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(7B)-Alpaca.ipynb) | 2x faster | 60% less |\n\n# Qwen3-4B-Thinking-2507\n\n \"Chat\"\n\n\n## Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of Qwen3-4B, improving both the **quality and depth** of reasoning. We are pleased to introduce **Qwen3-4B-Thinking-2507**, featuring the following key enhancements:\n\n- **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, and academic benchmarks that typically require human expertise.\n- **Markedly better general capabilities**, such as instruction following, tool usage, text generation, and alignment with human preferences.\n- **Enhanced 256K long-context understanding** capabilities.\n\n**NOTE**: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![image/jpeg](https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-2507/Qwen3-4B-Instruct.001.jpeg)\n\n## Model Overview\n\n**Qwen3-4B-Thinking-2507** has the following features:\n- Type: Causal Language Models\n- Training Stage: Pretraining & Post-training\n- Number of Parameters: 4.0B\n- Number of Paramaters (Non-Embedding): 3.6B\n- Number of Layers: 36\n- Number of Attention Heads (GQA): 32 for Q and 8 for KV\n- Context Length: **262,144 natively**. \n\n**NOTE: This model supports only thinking mode. Meanwhile, specifying `enable_thinking=True` is no longer required.**\n\nAdditionally, to enforce model thinking, the default chat template automatically includes ``. Therefore, it is normal for the model's output to contain only `` without an explicit opening `` tag.\n\nFor more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3/), [GitHub](https://github.com/QwenLM/Qwen3), and [Documentation](https://qwen.readthedocs.io/en/latest/).\n\n\n## Performance\n\n\n| | Qwen3-30B-A3B Thinking | Qwen3-4B Thinking | Qwen3-4B-Thinking-2507 |\n|--- | --- | --- | --- |\n| **Knowledge** | | |\n| MMLU-Pro | **78.5** | 70.4 | 74.0 |\n| MMLU-Redux | **89.5** | 83.7 | 86.1 |\n| GPQA | **65.8** | 55.9 | **65.8** |\n| SuperGPQA | **51.8** | 42.7 | 47.8 |\n| **Reasoning** | | |\n| AIME25 | 70.9 | 65.6 | **81.3** |\n| HMMT25 | 49.8 | 42.1 | **55.5** |\n| LiveBench 20241125 | **74.3** | 63.6 | 71.8 |\n| **Coding** | | |\n| LiveCodeBench v6 (25.02-25.05) | **57.4** | 48.4 | 55.2 |\n| CFEval | **1940** | 1671 | 1852 |\n| OJBench | **20.7** | 16.1 | 17.9 |\n| **Alignment** | | |\n| IFEval | 86.5 | 81.9 | **87.4** |\n| Arena-Hard v2$ | **36.3** | 13.7 | 34.9 |\n| Creative Writing v3 | **79.1** | 61.1 | 75.6 |\n| WritingBench | 77.0 | 73.5 | **83.3** |\n| **Agent** | | |\n| BFCL-v3 | 69.1 | 65.9 | **71.2** |\n| TAU1-Retail | 61.7 | 33.9 | **66.1** |\n| TAU1-Airline | 32.0 | 32.0 | **48.0** |\n| TAU2-Retail | 34.2 | 38.6 | **53.5** |\n| TAU2-Airline | 36.0 | 28.0 | **58.0** |\n| TAU2-Telecom | 22.8 | 17.5 | **27.2** |\n| **Multilingualism** | | |\n| MultiIF | 72.2 | 66.3 | **77.3** |\n| MMLU-ProX | **73.1** | 61.0 | 64.2 |\n| INCLUDE | **71.9** | 61.8 | 64.4 |\n| PolyMATH | 46.1 | 40.0 | **46.2** |\n\n$ For reproducibility, we report the win rates evaluated by GPT-4.1.\n\n\\& For highly challenging tasks (including PolyMATH and all reasoning and coding tasks), we use an output length of 81,920 tokens. For all other tasks, we set the output length to 32,768.\n\n## Quickstart\n\nThe code of Qwen3 has been in the latest Hugging Face `transformers` and we advise you to use the latest version of `transformers`.\n\nWith `transformers<4.51.0`, you will encounter the following error:\n```\nKeyError: 'qwen3'\n```\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs. \n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = \"Qwen/Qwen3-4B-Thinking-2507\"\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n torch_dtype=\"auto\",\n device_map=\"auto\"\n)\n\n# prepare the model input\nprompt = \"Give me a short introduction to large language model.\"\nmessages = [\n {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True,\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=32768\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() \n\n# parsing thinking content\ntry:\n # rindex finding 151668 ()\n index = len(output_ids) - output_ids[::-1].index(151668)\nexcept ValueError:\n index = 0\n\nthinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip(\"\\n\")\ncontent = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip(\"\\n\")\n\nprint(\"thinking content:\", thinking_content) # no opening tag\nprint(\"content:\", content)\n\n```\n\nFor deployment, you can use `sglang>=0.4.6.post1` or `vllm>=0.8.5` or to create an OpenAI-compatible API endpoint:\n- SGLang:\n ```shell\n python -m sglang.launch_server --model-path Qwen/Qwen3-4B-Thinking-2507 --context-length 262144 --reasoning-parser deepseek-r1\n ```\n- vLLM:\n ```shell\n vllm serve Qwen/Qwen3-4B-Thinking-2507 --max-model-len 262144 --enable-reasoning --reasoning-parser deepseek_r1\n ```\n\n**Note: If you encounter out-of-memory (OOM) issues, you may consider reducing the context length to a smaller value. However, since the model may require longer token sequences for reasoning, we strongly recommend using a context length greater than 131,072 when possible.**\n\nFor local use, applications such as Ollama, LMStudio, MLX-LM, llama.cpp, and KTransformers have also supported Qwen3.\n\n## Agentic Use\n\nQwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.\n\nTo define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.\n```python\nfrom qwen_agent.agents import Assistant\n\n# Define LLM\n# Using OpenAI-compatible API endpoint. It is recommended to disable the reasoning and the tool call parsing\n# functionality of the deployment frameworks and let Qwen-Agent automate the related operations. For example, \n# `VLLM_USE_MODELSCOPE=true vllm serve Qwen/Qwen3-4B-Thinking-2507 --served-model-name Qwen3-4B-Thinking-2507 --max-model-len 262144`.\nllm_cfg = {\n 'model': 'Qwen3-4B-Thinking-2507',\n\n # Use a custom endpoint compatible with OpenAI API:\n 'model_server': 'http://localhost:8000/v1', # api_base without reasoning and tool call parsing\n 'api_key': 'EMPTY',\n 'generate_cfg': {\n 'thought_in_content': True,\n },\n}\n\n# Define Tools\ntools = [\n {'mcpServers': { # You can specify the MCP configuration file\n 'time': {\n 'command': 'uvx',\n 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']\n },\n \"fetch\": {\n \"command\": \"uvx\",\n \"args\": [\"mcp-server-fetch\"]\n }\n }\n },\n 'code_interpreter', # Built-in tools\n]\n\n# Define Agent\nbot = Assistant(llm=llm_cfg, function_list=tools)\n\n# Streaming generation\nmessages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]\nfor responses in bot.run(messages=messages):\n pass\nprint(responses)\n```\n\n## Best Practices\n\nTo achieve optimal performance, we recommend the following settings:\n\n1. **Sampling Parameters**:\n - We suggest using `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0`.\n - For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.\n\n2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 81,920 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.\n\n3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.\n - **Math Problems**: Include \"Please reason step by step, and put your final answer within \\boxed{}.\" in the prompt.\n - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: \"Please show your choice in the `answer` field with only the choice letter, e.g., `\"answer\": \"C\"`.\"\n\n4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.\n\n\n### Citation\n\nIf you find our work helpful, feel free to give us a cite.\n\n```\n@misc{qwen3technicalreport,\n title={Qwen3 Technical Report}, \n author={Qwen Team},\n year={2025},\n eprint={2505.09388},\n archivePrefix={arXiv},\n primaryClass={cs.CL},\n url={https://arxiv.org/abs/2505.09388}, \n}\n```", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/unsloth/Qwen3-4B-Thinking-2507-GGUF/resolve/main/Qwen3-4B-Thinking-2507-Q4_K_M.gguf", "backend": "llama-cpp", "sha256": "ddd52e18200baab281c5c46f70d544ce4d4fe4846eab1608f2fff48a64554212", "properties": { "jinja": "true" } }, { "id": "hf.ibm-granite.granite-4.0-tiny-GGUF", "name": "ibm-granite/granite-4.0-tiny-GGUF", "description": "# Granite-4.0-H-Tiny\n\n**Model Summary:**\nGranite-4.0-H-Tiny is a 7B parameter long-context instruct model finetuned from *Granite-4.0-H-Tiny-Base* using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved *instruction following (IF)* and *tool-calling* capabilities, making them more effective in enterprise applications.\n\n- **Developers:** Granite Team, IBM\n- **HF Collection:** [Granite 4.0 Language Models HF Collection](https://huggingface.co/collections/ibm-granite/granite-40-language-models-6811a18b820ef362d9e5a82c)\n- **GitHub Repository:** [ibm-granite/granite-4.0-language-models](https://github.com/ibm-granite/granite-4.0-language-models)\n- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/) \n- **Release Date**: October 2nd, 2025\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Supported Languages:** \nEnglish, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 4.0 models for languages beyond these languages.\n\n**Intended use:** \nThe model is designed to respond to general instructions and can be used to build AI assistants for multiple domains, including business applications.\n\n*Capabilities*\n* Summarization\n* Text classification\n* Text extraction\n* Question-answering\n* Retrieval Augmented Generation (RAG)\n* Code related tasks\n* Function-calling tasks\n* Multilingual dialog use cases\n* Fill-In-the-Middle (FIM) code completions\n\n\n \n**Generation:** \nThis is a simple example of how to use Granite-4.0-H-Tiny model.\n\nInstall the following libraries:\n\n```shell\npip install torch torchvision torchaudio\npip install accelerate\npip install transformers\n```\nThen, copy the snippet from the section that is relevant for your use case.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\ndevice = \"cuda\"\nmodel_path = \"ibm-granite/granite-4.0-h-tiny\"\ntokenizer = AutoTokenizer.from_pretrained(model_path)\n# drop device_map if running on CPU\nmodel = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)\nmodel.eval()\n# change input text as desired\nchat = [\n { \"role\": \"user\", \"content\": \"Please list one IBM Research laboratory located in the United States. You should only output its name and location.\" },\n]\nchat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)\n# tokenize the text\ninput_tokens = tokenizer(chat, return_tensors=\"pt\").to(device)\n# generate output tokens\noutput = model.generate(**input_tokens, \n max_new_tokens=100)\n# decode output tokens into text\noutput = tokenizer.batch_decode(output)\n# print output\nprint(output[0])\n```\n\nExpected output:\n```shell\n<|start_of_role|>user<|end_of_role|>Please list one IBM Research laboratory located in the United States. You should only output its name and location.<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Almaden Research Center, San Jose, California<|end_of_text|>\n```\n\n**Tool-calling:** \nGranite-4.0-H-Tiny comes with enhanced tool calling capabilities, enabling seamless integration with external functions and APIs. To define a list of tools please follow OpenAI's function [definition schema](https://platform.openai.com/docs/guides/function-calling?api-mode=responses#defining-functions). \n\nThis is an example of how to use Granite-4.0-H-Tiny model tool-calling ability:\n\n```python\ntools = [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"description\": \"Get the current weather for a specified city.\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"city\": {\n \"type\": \"string\",\n \"description\": \"Name of the city\"\n }\n },\n \"required\": [\"city\"]\n }\n }\n }\n]\n\n# change input text as desired\nchat = [\n { \"role\": \"user\", \"content\": \"What's the weather like in Boston right now?\" },\n]\nchat = tokenizer.apply_chat_template(chat, \\\n tokenize=False, \\\n tools=tools, \\\n add_generation_prompt=True)\n# tokenize the text\ninput_tokens = tokenizer(chat, return_tensors=\"pt\").to(device)\n# generate output tokens\noutput = model.generate(**input_tokens, \n max_new_tokens=100)\n# decode output tokens into text\noutput = tokenizer.batch_decode(output)\n# print output\nprint(output[0])\n```\n\nExpected output:\n```shell\n<|start_of_role|>system<|end_of_role|>You are a helpful assistant with access to the following tools. You may call one or more tools to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"description\": \"Get the current weather for a specified city.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"string\", \"description\": \"Name of the city\"}}, \"required\": [\"city\"]}}}\n\n\nFor each tool call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>What's the weather like in Boston right now?<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n{\"name\": \"get_current_weather\", \"arguments\": {\"city\": \"Boston\"}}\n<|end_of_text|>\n```\n\n\n\n**Evaluation Results:** \n\n\n\n\n \n \n \n \n \n \n \n \n\n \n\n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n\n \n \n \n \n \n \n\n\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n\n \n \n\n \n \n \n \n \n \n\n\n \n \n \n \n \n \n\n
BenchmarksMetricMicro DenseH Micro DenseH Tiny MoEH Small MoE
\n General Tasks\n
MMLU5-shot65.9867.4368.6578.44
MMLU-Pro5-shot, CoT44.543.4844.9455.47
BBH3-shot, CoT72.4869.3666.3481.62
AGI EVAL0-shot, CoT64.295962.1570.63
GPQA0-shot, CoT30.1432.1532.5940.63
\n Alignment Tasks\n
AlpacaEval 2.029.4931.4930.6142.48
IFEvalInstruct, Strict85.586.9484.7889.87
IFEvalPrompt, Strict79.1281.7178.185.22
IFEvalAverage82.3184.3281.4487.55
ArenaHard25.8436.1535.7546.48
\n Math Tasks\n
GSM8K8-shot85.4581.3584.6987.27
GSM8K Symbolic8-shot79.8277.581.187.38
Minerva Math0-shot, CoT62.0666.4469.6474
DeepMind Math0-shot, CoT44.5643.8349.9259.33
\n Code Tasks\n
HumanEvalpass@180818388
HumanEval+pass@172757683
MBPPpass@172738084
MBPP+pass@164646971
CRUXEval-Opass@141.541.2539.6350.25
BigCodeBenchpass@139.2137.941.0646.23
\n Tool Calling Tasks\n
BFCL v359.9857.5657.6564.69
\n Multilingual Tasks\n
MULTIPLEpass@149.2149.4655.8357.37
MMMLU5-shot55.1455.1961.8769.69
INCLUDE5-shot51.6250.5153.1263.97
MGSM8-shot28.5644.4845.3638.72
\n Safety\n
SALAD-Bench97.0696.2897.7797.3
AttaQ86.0584.4486.6186.64
\n\n\n\n \n\n \n \n \n \n \n\n\n\n \n \n \n\n\n \n \n\n \n \n\n\n \n \n \n\n\n
Multilingual Benchmarks and thr included languages:
Benchmarks# LangsLanguages
MMMLU11ar, de, en, es, fr, ja, ko, pt, zh, bn, hi
INCLUDE14hi, bn, ta, te, ar, de, es, fr, it, ja, ko, nl, pt, zh
MGSM5en, es, fr, ja, zh
\n\n**Model Architecture:** \nGranite-4.0-H-Tiny baseline is built on a decoder-only MoE transformer architecture. Core components of this architecture are: GQA, Mamba2, MoEs with shared experts, SwiGLU activation, RMSNorm, and shared input/output embeddings.\n\n\n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ModelMicro DenseH Micro DenseH Tiny MoEH Small MoE
Embedding size2560204815364096
Number of layers40 attention4 attention / 36 Mamba24 attention / 36 Mamba24 attention / 36 Mamba2
Attention head size6464128128
Number of attention heads40321232
Number of KV heads8848
Mamba2 state size-128128128
Number of Mamba2 heads-6448128
MLP / Shared expert hidden size8192819210241536
Num. Experts--6472
Num. active Experts--610
Expert hidden size--512768
MLP activationSwiGLUSwiGLUSwiGLUSwiGLU
Sequence length128K128K128K128K
Position embeddingRoPENoPENoPENoPE
# Parameters3B3B7B32B
# Active parameters3B3B1B9B
\n\n**Training Data:** \nOverall, our SFT data is largely comprised of three key sources: (1) publicly available datasets with permissive license, (2) internal synthetic data targeting specific capabilities, and (3) a select set of human-curated data.\n\n**Infrastructure:**\nWe trained the Granite 4.0 Language Models utilizing an NVIDIA GB200 NVL72 cluster hosted in CoreWeave. Intra-rack communication occurs via the 72-GPU NVLink domain, and a non-blocking, full Fat-Tree NDR 400 Gb/s InfiniBand network provides inter-rack communication. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs.\n\n**Ethical Considerations and Limitations:** \nGranite 4.0 Instruction Models are primarily finetuned using instruction-response pairs mostly in English, but also multilingual data covering multiple languages. Although this model can handle multilingual dialog use cases, its performance might not be similar to English tasks. In such case, introducing a small number of examples (few-shot) can help the model in generating more accurate outputs. While this model has been aligned by keeping safety in consideration, the model may in some cases produce inaccurate, biased, or unsafe responses to user prompts. So we urge the community to use this model with proper safety testing and tuning tailored for their specific tasks.\n\n**Resources**\n- ⭐\uFE0F Learn about the latest updates with Granite: https://www.ibm.com/granite\n- \uD83D\uDCC4 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/\n- \uD83D\uDCA1 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources\n\n", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ibm-granite/granite-4.0-h-tiny-GGUF/resolve/3971ea11968c34d4e4dbee55cfb55b9cba134b21/granite-4.0-h-tiny-Q4_K_M.gguf", "memory": 4224733676, "properties": { "jinja": "true" }, "sha256": "491ba81786c46a345a5da9a60cdb9f9a3056960c8411dd857153c194b1f91313", "backend": "llama-cpp" }, { "id": "hf.ibm-granite.granite-4.0-micro-GGUF", "name": "ibm-granite/granite-4.0-micro-GGUF", "description": "# Granite-4.0-Micro\n\n**Model Summary:**\nGranite-4.0-Micro is a compact language model from the Granite 4.0 family designed for efficient deployment with strong performance. This Q4_K_M quantized GGUF version provides a good balance between model size and quality, making it suitable for resource-constrained environments while maintaining the core capabilities of the Granite 4.0 series.\n\n- **Developers:** Granite Team, IBM\n- **HF Collection:** [Granite 4.0 Language Models HF Collection](https://huggingface.co/collections/ibm-granite/granite-40-language-models-6811a18b820ef362d9e5a82c)\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Capabilities:**\n* General instruction following\n* Question-answering\n* Text generation\n* Conversational AI\n* Multilingual dialog use cases\n\n**Intended Use:**\nThe model is designed to respond to general instructions and can be used to build AI assistants for multiple domains, particularly in scenarios where model size and inference speed are important considerations.", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ibm-granite/granite-4.0-micro-GGUF/resolve/397e2dcbd97dcdfa016934bffed65cf5df3ca55f/granite-4.0-micro-Q4_K_M.gguf", "memory": 2100000000, "properties": { "jinja": "true" }, "sha256": "6c02683809a8dc4eb05c78d44bc63bcd707703b078998fa58829c858ab337bb0", "backend": "llama-cpp" }, { "id": "hf.ibm-granite.granite-3.3-8b-instruct-GGUF", "name": "ibm-granite/granite-3.3-8b-instruct-GGUF", "description": "# Granite-3.3-8B-Instruct\n\n**Model Summary:**\nGranite-3.3-8B-Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. Built on top of Granite-3.3-8B-Base, the model delivers significant gains on benchmarks for measuring generic performance including AlpacaEval-2.0 and Arena-Hard, and improvements in mathematics, coding, and instruction following. It supprts structured reasoning through \\\\<\\/think\\> and \\\\<\\/response\\> tags, providing clear separation between internal thoughts and final outputs. The model has been trained on a carefully balanced combination of permissively licensed data and curated synthetic tasks.\n\n- **Developers:** Granite Team, IBM\n- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)\n- **Release Date**: April 16th, 2025\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Supported Languages:** \nEnglish, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. However, users may finetune this Granite model for languages beyond these 12 languages.\n\n**Intended Use:** \nThis model is designed to handle general instruction-following tasks and can be integrated into AI assistants across various domains, including business applications.\n\n**Capabilities**\n* Thinking\n* Summarization\n* Text classification\n* Text extraction\n* Question-answering\n* Retrieval Augmented Generation (RAG)\n* Code related tasks\n* Function-calling tasks\n* Multilingual dialog use cases\n* Fill-in-the-middle\n* Long-context tasks including long document/meeting summarization, long document QA, etc.\n\n\n**Generation:** \nThis is a simple example of how to use Granite-3.3-8B-Instruct model.\n\nInstall the following libraries:\n\n```shell\npip install torch torchvision torchaudio\npip install accelerate\npip install transformers\n```\nThen, copy the snippet from the section that is relevant for your use case.\n\n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, set_seed\nimport torch\n\nmodel_path=\"ibm-granite/granite-3.3-8b-instruct\"\ndevice=\"cuda\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_path,\n device_map=device,\n torch_dtype=torch.bfloat16,\n )\ntokenizer = AutoTokenizer.from_pretrained(\n model_path\n)\n\nconv = [{\"role\": \"user\", \"content\":\"Redesign a common household item to make it more sustainable and user-friendly. Explain the changes and their benefits.\"}]\n\ninput_ids = tokenizer.apply_chat_template(conv, return_tensors=\"pt\", thinking=True, return_dict=True, add_generation_prompt=True).to(device)\n\nset_seed(42)\noutput = model.generate(\n **input_ids,\n max_new_tokens=8192,\n)\n\nprediction = tokenizer.decode(output[0, input_ids[\"input_ids\"].shape[1]:], skip_special_tokens=True)\nprint(prediction)\n```\n\n**Example Outputs**\n- thinking=True\n```md\nTo address this prompt, I'll focus on redesigning a common household item: the plastic water bottle. The goal is to make it more sustainable and user-friendly. Here are the proposed changes and their benefits:\n\n1. **Material**: Replace plastic with biodegradable or recyclable materials like glass or plant-based plastics (PLA).\n2. **Design**: Introduce a modular design for easy refilling and cleaning.\n3. **Functionality**: Incorporate a built-in water filter for purification.\n4. **Durability**: Enhance the bottle's lifespan through robust construction.\n5. **User-friendliness**: Simplify the cap mechanism for easy opening and closing.\n\nThese changes would significantly reduce plastic waste, promote recycling, ensure access to clean drinking water, and enhance the overall user experience.\n\nNow, I will detail each change and its benefits in a structured format.\n\n### Redesign of the Plastic Water Bottle for Sustainability and User-Friendliness\n\n#### 1. Material: Biodegradable or Recyclable Alternatives\n\n**Change**: Transition from traditional PET plastic to glass or plant-based plastics (Polylactic Acid - PLA).\n\n**Benefits**:\n- **Sustainability**: Glass and PLA are biodegradable or highly recyclable, drastically reducing plastic pollution.\n- **Health**: Unlike PET, these materials do not leach chemicals into the water, ensuring safer consumption.\n\n#### 2. Modular Design for Easy Refilling and Cleaning\n\n**Change**: Implement a detachable, interchangeable inner liner within a reusable outer casing.\n\n**Benefits**:\n- **User-Friendliness**: Users can easily remove the inner liner for thorough cleaning, reducing the risk of bacterial growth.\n- **Sustainability**: The outer casing remains intact, encouraging reuse, while the inner liner can be replaced or recycled when worn out.\n\n#### 3. Built-in Water Filter\n\n**Change**: Integrate a removable, replaceable water filter into the bottle cap.\n\n**Benefits**:\n- **Accessibility**: Provides clean drinking water directly from any source (tap, river, etc.), eliminating the need for additional filtration devices.\n- **Health**: Removes contaminants, bacteria, and impurities, ensuring safe hydration.\n- **Sustainability**: Reduces the use of single-use plastic water bottles by offering a purification solution within a reusable container.\n\n#### 4. Enhanced Durability\n\n**Change**: Use high-quality, durable materials and robust construction techniques for both the outer casing and inner liner.\n\n**Benefits**:\n- **Longevity**: A more robust design increases the bottle's lifespan, reducing the frequency of replacements.\n- **Cost-effectiveness**: Although the initial cost might be higher, the long-term savings from reduced replacements make it economical.\n- **Sustainability**: Less frequent disposal means lower environmental impact.\n\n#### 5. Simplified Cap Mechanism\n\n**Change**: Adopt a one-touch, easy-to-open and close cap design, possibly inspired by sports bottle caps.\n\n**Benefits**:\n- **User-Friendliness**: Especially beneficial for children, elderly, or those with limited hand dexterity.\n- **Convenience**: Quick access to water encourages more frequent hydration, promoting healthier habits.\n\n### Conclusion\n\nThis redesigned water bottle addresses key sustainability concerns by minimizing plastic use and waste, while also enhancing user-friendliness through improved functionality, durability, and accessibility. By integrating biodegradable materials, a modular design, a built-in filter, robust construction, and a simple cap mechanism, the bottle not only reduces environmental footprint but also offers a superior user experience.\n```\n- thinking=False\n```md\nItem for Redesign: Plastic Bottle (e.g., water or beverage bottle)\n\nRedesign Concept: Eco-Friendly, Modular, and Multi-Functional Reusable Bottle\n\nChanges:\n\n1. Material: Replace the conventional plastic with food-grade, BPA-free, and recyclable materials such as stainless steel or glass. These materials are durable, non-toxic, and have a longer lifespan, reducing waste.\n\n2. Modular Design: Introduce a modular design that allows users to customize their bottle according to their needs. This could include interchangeable caps, mouthpiece sizes, and carrying loops. Users can purchase additional modules as needed, promoting a more sustainable approach by reducing the need to buy an entirely new bottle for different purposes.\n\n3. Integrated Filter: Incorporate a built-in, washable, and reusable filter that can remove impurities and improve the taste of water. This eliminates the need for single-use disposable filters or bottled water, further reducing plastic waste.\n\n4. Smart Cap: Develop a smart cap with a built-in digital display and temperature sensor. This feature allows users to track their daily water intake, set hydration goals, and monitor the temperature of their beverage. The smart cap can be synced with a mobile app for additional functionality, such as reminders and progress tracking.\n\n5. Easy-to-Clean Design: Ensure the bottle has a wide mouth and smooth interior surfaces for easy cleaning. Include a brush for hard-to-reach areas, making maintenance simple and encouraging regular use.\n\n6. Collapsible Structure: Implement a collapsible design that reduces the bottle's volume when not in use, making it more portable and convenient for storage.\n\nBenefits:\n\n1. Sustainability: By using recyclable materials and reducing plastic waste, this redesigned bottle significantly contributes to a more sustainable lifestyle. The modular design and reusable filter also minimize single-use plastic consumption.\n\n2. User-Friendly: The smart cap, easy-to-clean design, and collapsible structure make the bottle convenient and user-friendly. Users can customize their bottle to suit their needs, ensuring a better overall experience.\n\n3. Healthier Option: Using food-grade, BPA-free materials and an integrated filter ensures that the beverages consumed are free from harmful chemicals and impurities, promoting a healthier lifestyle.\n\n4. Cost-Effective: Although the initial investment might be higher, the long-term savings from reduced purchases of single-use plastic bottles and disposable filters make this reusable bottle a cost-effective choice.\n\n5. Encourages Hydration: The smart cap's features, such as hydration tracking and temperature monitoring, can motivate users to stay hydrated and develop healthier habits.\n\nBy redesigning a common household item like the plastic bottle, we can create a more sustainable, user-friendly, and health-conscious alternative that benefits both individuals and the environment.\n```\n\n**Evaluation Results:**\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Comparison with different models over various benchmarks1. Scores of AlpacaEval-2.0 and Arena-Hard are calculated with thinking=True
ModelsArena-HardAlpacaEval-2.0MMLUPopQATruthfulQABigBenchHard2DROP3GSM8KHumanEvalHumanEval+IFEvalAttaQ
Granite-3.1-2B-Instruct23.327.1757.1120.5559.7961.8220.9967.5579.4575.2663.5984.7
Granite-3.2-2B-Instruct24.8634.5157.1820.5659.861.3923.8467.0280.1373.3961.5583.23
Granite-3.3-2B-Instruct 28.86 43.45 55.88 18.4 58.97 63.91 44.33 72.48 80.51 75.68 65.8 87.47
Llama-3.1-8B-Instruct36.4327.2269.1528.7952.7973.4371.2383.2485.3280.1579.1083.43
DeepSeek-R1-Distill-Llama-8B17.1721.8545.8013.2547.4367.3949.7372.1867.5462.9166.5042.87
Qwen-2.5-7B-Instruct25.4430.3474.3018.1263.0669.1964.0684.4693.3589.9174.9081.90
DeepSeek-R1-Distill-Qwen-7B10.3615.3550.729.9447.1467.3851.7878.4779.8978.4359.1042.45
Granite-3.1-8B-Instruct37.5830.3466.7728.765.8469.8758.5779.1589.6385.7973.2085.73
Granite-3.2-8B-Instruct55.2561.1966.7928.0466.9271.8658.2981.6589.3585.7274.3184.7
Granite-3.3-8B-Instruct 57.56 62.68 65.54 26.17 66.86 69.13 59.36 80.89 89.73 86.09 74.82 88.5
\n\n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Math Benchmarks
ModelsAIME24MATH-500
Granite-3.1-2B-Instruct 0.89 35.07
Granite-3.2-2B-Instruct 0.89 35.54
Granite-3.3-2B-Instruct 3.28 58.09
Granite-3.1-8B-Instruct 1.97 48.73
Granite-3.2-8B-Instruct 2.43 52.8
Granite-3.3-8B-Instruct 8.12 69.02
\n \n**Training Data:** \nOverall, our training data is largely comprised of two key sources: (1) publicly available datasets with permissive license, (2) internal synthetically generated data targeted to enhance reasoning capabilites. \n\n\n**Infrastructure:**\nWe train Granite-3.3-8B-Instruct using IBM's super computing cluster, Blue Vela, which is outfitted with NVIDIA H100 GPUs. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs.\n\n**Ethical Considerations and Limitations:** \nGranite-3.3-8B-Instruct builds upon Granite-3.3-8B-Base, leveraging both permissively licensed open-source and select proprietary data for enhanced performance. Since it inherits its foundation from the previous model, all ethical considerations and limitations applicable to [Granite-3.3-8B-Base](https://huggingface.co/ibm-granite/granite-3.3-8b-base) remain relevant.\n\n\n**Resources**\n- ⭐\uFE0F Learn about the latest updates with Granite: https://www.ibm.com/granite\n- \uD83D\uDCC4 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/\n- \uD83D\uDCA1 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources\n\n

[1] Evaluated using OLMES (except AttaQ and Arena-Hard scores)

\n

[2] Added regex for more efficient asnwer extraction.

\n

[3] Modified the implementation to handle some of the issues mentioned here

\n", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf", "memory": 4939212390, "properties": { "jinja": "true" }, "sha256": "77bcee066a76dcdd10d0d123c87e32c8ec2c74e31b6ffd87ebee49c9ac215dca", "backend": "llama-cpp" }, { "id": "hf.ibm-research.granite-3.2-8b-instruct-GGUF", "name": "ibm-research/granite-3.2-8b-instruct-GGUF", "description": "# Granite-3.2-8B-Instruct-GGUF\n\n**Model Summary:**\nGranite-3.2-8B-Instruct is an 8-billion-parameter, long-context AI model fine-tuned for thinking capabilities. Built on top of [Granite-3.1-8B-Instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct), it has been trained using a mix of permissively licensed open-source datasets and internally generated synthetic data designed for reasoning tasks. The model allows controllability of its thinking capability, ensuring it is applied only when required.\n\n- **Developers:** Granite Team, IBM\n- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)\n- **Release Date**: February 26th, 2025\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Supported Languages:** \nEnglish, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. However, users may finetune this Granite model for languages beyond these 12 languages.\n\n**Intended Use:** \nThis model is designed to handle general instruction-following tasks and can be integrated into AI assistants across various domains, including business applications.\n\n**Capabilities**\n* **Thinking**\n* Summarization\n* Text classification\n* Text extraction\n* Question-answering\n* Retrieval Augmented Generation (RAG)\n* Code related tasks\n* Function-calling tasks\n* Multilingual dialog use cases\n* Long-context tasks including long document/meeting summarization, long document QA, etc.", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ibm-research/granite-3.2-8b-instruct-GGUF/resolve/main/granite-3.2-8b-instruct-Q4_K_M.gguf", "memory": 4939212390, "properties": { "chatFormat": "openchat" }, "sha256": "363f0bbc3200b9c9b0ab87efe237d77b1e05bb929d5d7e4b57c1447c911223e8", "backend": "llama-cpp" }, { "id": "hf.ibm-granite.granite-8b-code-instruct", "name": "ibm-granite/granite-8b-code-instruct-GGUF", "description": "![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png)\n\n# ibm-granite/granite-8b-code-instruct-GGUF\nThis is the Q4_K_M converted version of the original [`ibm-granite/granite-8b-code-instruct`](https://huggingface.co/ibm-granite/granite-8b-code-instruct).\nRefer to the [original model card](https://huggingface.co/ibm-granite/granite-8b-code-instruct) for more details.\n\n## Use with llama.cpp\n```shell\ngit clone https://github.com/ggerganov/llama.cpp\ncd llama.cpp\n\n# install\nmake\n\n# run generation\n./main -m granite-8b-code-instruct-GGUF/granite-8b-code-instruct.Q4_K_M.gguf -n 128 -p \"def generate_random(x: int):\" --color\n```", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ibm-granite/granite-8b-code-instruct-GGUF/resolve/main/granite-8b-code-instruct.Q4_K_M.gguf", "memory": 5347234284, "properties": { "chatFormat": "openchat" }, "sha256": "bc8804cb43c4e1e82e2188658569b147587f83a89640600a64d5f7d7de2565b4", "backend": "llama-cpp" }, { "id": "hf.ggerganov.whisper.cpp", "name": "ggerganov/whisper.cpp", "description": "# OpenAI's Whisper models converted to ggml format\n\n[Available models](https://huggingface.co/ggerganov/whisper.cpp/tree/main)\n", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", "memory": 487010000, "sha256": "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b", "backend": "whisper-cpp" }, { "id": "hf.facebook.detr-resnet-101", "name": "facebook/detr-resnet-101", "description": "# DETR (End-to-End Object Detection) model with ResNet-101 backbone\n\nDEtection TRansformer (DETR) model trained end-to-end on COCO 2017 object detection (118k annotated images). It was introduced in the paper [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) by Carion et al. and first released in [this repository](https://github.com/facebookresearch/detr). \n\nDisclaimer: The team releasing DETR did not write a model card for this model so this model card has been written by the Hugging Face team.\n\n## Model description\n\nThe DETR model is an encoder-decoder transformer with a convolutional backbone. Two heads are added on top of the decoder outputs in order to perform object detection: a linear layer for the class labels and a MLP (multi-layer perceptron) for the bounding boxes. The model uses so-called object queries to detect objects in an image. Each object query looks for a particular object in the image. For COCO, the number of object queries is set to 100. \n\nThe model is trained using a \"bipartite matching loss\": one compares the predicted classes + bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N (so if an image only contains 4 objects, 96 annotations will just have a \"no object\" as class and \"no bounding box\" as bounding box). The Hungarian matching algorithm is used to create an optimal one-to-one mapping between each of the N queries and each of the N annotations. Next, standard cross-entropy (for the classes) and a linear combination of the L1 and generalized IoU loss (for the bounding boxes) are used to optimize the parameters of the model.\n\n![model image](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/detr_architecture.png)\n\n## Intended uses & limitations\n\nYou can use the raw model for object detection. See the [model hub](https://huggingface.co/models?search=facebook/detr) to look for all available DETR models.", "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/facebook/detr-resnet-101/resolve/no_timm/pytorch_model.bin", "memory": 242980000, "properties": { "name": "facebook/detr-resnet-101" }, "sha256": "893ae2442b36b2e8e1134ccbf8c0d9bd670648d0964509202ab30c9cbb3d2114", "backend": "none" }, { "id": "hf.microsoft.Phi-4-mini-reasoning", "name": "microsoft/Phi-4-mini-reasoning (Unsloth quantization)", "description": "## Model Summary\n \nPhi-4-mini-reasoning is a lightweight open model built upon synthetic data with a focus on high-quality, reasoning dense data further finetuned for more advanced math reasoning capabilities. \nThe model belongs to the Phi-4 model family and supports 128K token context length. \n \n\uD83D\uDCF0 [Phi-4-mini-reasoning Blog](https://aka.ms/phi4-mini-reasoning/blog), and [Developer Article](https://techcommunity.microsoft.com/blog/azuredevcommunityblog/make-phi-4-mini-reasoning-more-powerful-with-industry-reasoning-on-edge-devices/4409764)
\n\uD83D\uDCD6 [Phi-4-mini-reasoning Technical Report](https://aka.ms/phi4-mini-reasoning/techreport) | [HF paper](https://huggingface.co/papers/2504.21233)
\n\uD83D\uDC69\u200D\uD83C\uDF73 [Phi Cookbook](https://github.com/microsoft/PhiCookBook)
\n\uD83C\uDFE1 [Phi Portal](https://azure.microsoft.com/en-us/products/phi)
\n\uD83D\uDDA5\uFE0F Try It [Azure](https://aka.ms/phi4-mini-reasoning/azure)
\n \n \n\uD83C\uDF89**Phi-4 models**: [[Phi-4-reasoning](https://huggingface.co/microsoft/Phi-4-reasoning)] | [[multimodal-instruct](https://huggingface.co/microsoft/Phi-4-multimodal-instruct) | [onnx](https://huggingface.co/microsoft/Phi-4-multimodal-instruct-onnx)]; \n[[mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [onnx](https://huggingface.co/microsoft/Phi-4-mini-instruct-onnx)]\n\n## Intended Uses\n \n### Primary Use Cases\n\nPhi-4-mini-reasoning is designed for multi-step, logic-intensive mathematical problem-solving tasks under memory/compute constrained environments and latency bound scenarios.\nSome of the use cases include formal proof generation, symbolic computation, advanced word problems, and a wide range of mathematical reasoning scenarios. \nThese models excel at maintaining context across steps, applying structured logic, and delivering accurate, reliable solutions in domains that require deep analytical thinking.\n\n### Use Case Considerations\n \nThis model is designed and tested for math reasoning only. It is not specifically designed or evaluated for all downstream purposes. \nDevelopers should consider common limitations of language models, as well as performance difference across languages, as they select use cases, and evaluate and mitigate for accuracy, safety, and fairness before using within a specific downstream use case, particularly for high-risk scenarios. \nDevelopers should be aware of and adhere to applicable laws or regulations (including but not limited to privacy, trade compliance laws, etc.) that are relevant to their use case. \n \n***Nothing contained in this Model Card should be interpreted as or deemed a restriction or modification to the license the model is released under.***\n \n## Release Notes\n \nThis release of Phi-4-mini-reasoning addresses user feedback and market demand for a compact reasoning model. \nIt is a compact transformer-based language model optimized for mathematical reasoning, built to deliver high-quality, step-by-step problem solving in environments where computing or latency is constrained.\nThe model is fine-tuned with synthetic math data from a more capable model (much larger, smarter, more accurate, and better at following instructions), which has resulted in enhanced reasoning performance. \nPhi-4-mini-reasoning balances reasoning ability with efficiency, making it potentially suitable for educational applications, embedded tutoring, and lightweight deployment on edge or mobile systems.\nIf a critical issue is identified with Phi-4-mini-reasoning, it should be promptly reported through the MSRC Researcher Portal or secure@microsoft.com \n \n### Model Quality\n \nTo understand the capabilities, the 3.8B parameters Phi-4-mini-reasoning model was compared with a set of models over a variety of reasoning benchmarks. \nA high-level overview of the model quality is as follows:\n\n| Model | AIME | MATH-500 | GPQA Diamond |\n|------------------------------------|-------|----------|--------------|\n| o1-mini* | 63.6 | 90.0 | 60.0 |\n| DeepSeek-R1-Distill-Qwen-7B | 53.3 | 91.4 | 49.5 |\n| DeepSeek-R1-Distill-Llama-8B | 43.3 | 86.9 | 47.3 |\n| Bespoke-Stratos-7B* | 20.0 | 82.0 | 37.8 |\n| OpenThinker-7B* | 31.3 | 83.0 | 42.4 |\n| Llama-3.2-3B-Instruct | 6.7 | 44.4 | 25.3 |\n| Phi-4-Mini (base model, 3.8B) | 10.0 | 71.8 | 36.9 |\n|**Phi-4-mini-reasoning (3.8B)** | **57.5** | **94.6** | **52.0** |\n \nOverall, the model with only 3.8B-param achieves a similar level of multilingual language understanding and reasoning ability as much larger models.\nHowever, it is still fundamentally limited by its size for certain tasks. The model simply does not have the capacity to store too much factual knowledge, therefore, users may experience factual incorrectness. However, it may be possible to resolve such weakness by augmenting Phi-4 with a search engine, particularly when using the model under RAG settings.\n \n## Usage\n \n### Tokenizer\n \nPhi-4-mini-reasoning supports a vocabulary size of up to `200064` tokens. The [tokenizer files](https://huggingface.co/microsoft/Phi-4-mini-reasoning/blob/main/added_tokens.json) already provide placeholder tokens that can be used for downstream fine-tuning, but they can also be extended up to the model's vocabulary size.\n \n### Input Formats\n \nGiven the nature of the training data, the Phi-4-mini-instruct\nmodel is best suited for prompts using specific formats.\nBelow are the two primary formats:\n \n#### Chat format\n \nThis format is used for general conversation and instructions:\n \n```yaml\n<|system|>Your name is Phi, an AI math expert developed by Microsoft.<|end|><|user|>How to solve 3*x^2+4*x+5=1?<|end|><|assistant|>\n```\n### Inference with transformers\n\nPhi-4-mini-reasoning has been integrated in the `4.51.3` version of `transformers`. The current `transformers` version can be verified with: `pip list | grep transformers`.\nPython 3.8 and 3.10 will work best. \nList of required packages:\n\n```\nflash_attn==2.7.4.post1\ntorch==2.5.1\ntransformers==4.51.3\naccelerate==1.3.0\n```\n \nPhi-4-mini-reasoning is also available in [Azure AI Studio](https://aka.ms/phi-4-mini-reasoning/azure)\n\n#### Example\n \nAfter obtaining the Phi-4-mini-instruct model checkpoints, users can use this sample code for inference.\n \n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\ntorch.random.manual_seed(0)\n\nmodel_id = \"microsoft/Phi-4-mini-reasoning\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"cuda\",\n torch_dtype=\"auto\",\n trust_remote_code=True,\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\nmessages = [{\n \"role\": \"user\",\n \"content\": \"How to solve 3*x^2+4*x+5=1?\"\n}] \ninputs = tokenizer.apply_chat_template(\n messages,\n add_generation_prompt=True,\n return_dict=True,\n return_tensors=\"pt\",\n)\n\noutputs = model.generate(\n **inputs.to(model.device),\n max_new_tokens=32768,\n temperature=0.8,\n top_p=0.95,\n do_sample=True,\n)\noutputs = tokenizer.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])\n\nprint(outputs[0])\n```\n \n## Training\n \n### Model\n \n+ **Architecture:** Phi-4-mini-reasoning shares the same architecture as Phi-4-Mini, which has 3.8B parameters and is a dense decoder-only Transformer model. When compared with Phi-3.5-Mini, the major changes with Phi-4-Mini are 200K vocabulary, grouped-query attention, and shared input and output embedding.
\n+ **Inputs:** Text. It is best suited for prompts using the chat format.
\n+ **Context length:** 128K tokens
\n+ **GPUs:** 128 H100-80G
\n+ **Training time:** 2 days
\n+ **Training data:** 150B tokens
\n+ **Outputs:** Generated text
\n+ **Dates:** Trained in February 2024
\n+ **Status:** This is a static model trained on offline datasets with the cutoff date of February 2025 for publicly available data.
\n+ **Supported languages:** English
\n+ **Release date:** April 2025
\n \n### Training Datasets\n \nThe training data for Phi-4-mini-reasoning consists exclusively of synthetic mathematical content generated by a stronger and more advanced reasoning model, Deepseek-R1. \nThe objective is to distill knowledge from this model. This synthetic dataset comprises over one million diverse math problems spanning multiple levels of difficulty (from middle school to Ph.D. level).\nFor each problem in the synthetic dataset, eight distinct solutions (rollouts) were sampled, and only those verified as correct were retained, resulting in approximately 30 billion tokens of math content.\nThe dataset integrates three primary components: \n1) a curated selection of high-quality, publicly available math questions and a part of the SFT(Supervised Fine-Tuning) data that was used to train the base Phi-4-Mini model;\n2) an extensive collection of synthetic math data generated by the Deepseek-R1 model, designed specifically for high-quality supervised fine-tuning and model distillation; and\n3) a balanced set of correct and incorrect answers used to construct preference data aimed at enhancing Phi-4-mini-reasoning's reasoning capabilities by learning more effective reasoning trajectories\n\n## Software\n* [PyTorch](https://github.com/pytorch/pytorch)\n* [Transformers](https://github.com/huggingface/transformers)\n* [Flash-Attention](https://github.com/HazyResearch/flash-attention)\n \n## Hardware\nNote that by default, the Phi-4-mini-reasoning model uses flash attention, which requires certain types of GPU hardware to run. We have tested on the following GPU types:\n* NVIDIA A100\n* NVIDIA H100\n \nIf you want to run the model on:\n* NVIDIA V100 or earlier generation GPUs: call AutoModelForCausalLM.from_pretrained() with attn_implementation=\"eager\"\n\n## Safety Evaluation and Red-Teaming\n \nThe Phi-4 family of models has adopted a robust safety post-training approach. This approach leverages a variety of both open-source and in-house generated datasets. The overall technique employed to do the safety alignment is a combination of SFT, DPO (Direct Preference Optimization), and RLHF (Reinforcement Learning from Human Feedback) approaches by utilizing human-labeled and synthetic English-language datasets, including publicly available datasets focusing on helpfulness and harmlessness, as well as various questions and answers targeted to multiple safety categories. \n\nPhi-4-Mini-Reasoning was developed in accordance with Microsoft's responsible AI principles. Potential safety risks in the model’s responses were assessed using the Azure AI Foundry’s Risk and Safety Evaluation framework, focusing on harmful content, direct jailbreak, and model groundedness. The Phi-4-Mini-Reasoning Model Card contains additional information about our approach to safety and responsible AI considerations that developers should be aware of when using this model.\n\n## Responsible AI Considerations\n \nLike other language models, the Phi family of models can potentially behave in ways that are unfair, unreliable, or offensive. Some of the limiting behaviors to be aware of include:\n \n+ Quality of Service: The Phi models are trained primarily on English text and some additional multilingual text. Languages other than English will experience worse performance as well as performance disparities across non-English. English language varieties with less representation in the training data might experience worse performance than standard American English. \n+ Multilingual performance and safety gaps: We believe it is important to make language models more widely available across different languages, but the Phi 4 models still exhibit challenges common across multilingual releases. As with any deployment of LLMs, developers will be better positioned to test for performance or safety gaps for their linguistic and cultural context and customize the model with additional fine-tuning and appropriate safeguards.\n+ Representation of Harms & Perpetuation of Stereotypes: These models can over- or under-represent groups of people, erase representation of some groups, or reinforce demeaning or negative stereotypes. Despite safety post-training, these limitations may still be present due to differing levels of representation of different groups, cultural contexts, or prevalence of examples of negative stereotypes in training data that reflect real-world patterns and societal biases.\n+ Inappropriate or Offensive Content: These models may produce other types of inappropriate or offensive content, which may make it inappropriate to deploy for sensitive contexts without additional mitigations that are specific to the case.\n+ Information Reliability: Language models can generate nonsensical content or fabricate content that might sound reasonable but is inaccurate or outdated. \n+\tElection Information Reliability : The model has an elevated defect rate when responding to election-critical queries, which may result in incorrect or unauthoritative election critical information being presented. We are working to improve the model's performance in this area. Users should verify information related to elections with the election authority in their region.\n+ Limited Scope for Code: The majority of Phi 4 training data is based in Python and uses common packages such as \"typing, math, random, collections, datetime, itertools\". If the model generates Python scripts that utilize other packages or scripts in other languages, it is strongly recommended that users manually verify all API uses.\n+ Long Conversation: Phi 4 models, like other models, can in some cases generate responses that are repetitive, unhelpful, or inconsistent in very long chat sessions in both English and non-English languages. Developers are encouraged to place appropriate mitigations, like limiting conversation turns to account for the possible conversational drift.\n \nDevelopers should apply responsible AI best practices, including mapping, measuring, and mitigating risks associated with their specific use case and cultural, linguistic context. Phi 4 family of models are general purpose models. As developers plan to deploy these models for specific use cases, they are encouraged to fine-tune the models for their use case and leverage the models as part of broader AI systems with language-specific safeguards in place. Important areas for consideration include: \n \n+ Allocation: Models may not be suitable for scenarios that could have consequential impact on legal status or the allocation of resources or life opportunities (ex: housing, employment, credit, etc.) without further assessments and additional debiasing techniques.\n+ High-Risk Scenarios: Developers should assess the suitability of using models in high-risk scenarios where unfair, unreliable or offensive outputs might be extremely costly or lead to harm. This includes providing advice in sensitive or expert domains where accuracy and reliability are critical (ex: legal or health advice). Additional safeguards should be implemented at the application level according to the deployment context.\n+ Misinformation: Models may produce inaccurate information. Developers should follow transparency best practices and inform end-users they are interacting with an AI system. At the application level, developers can build feedback mechanisms and pipelines to ground responses in use-case specific, contextual information, a technique known as Retrieval Augmented Generation (RAG). \n+ Generation of Harmful Content: Developers should assess outputs for their context and use available safety classifiers or custom solutions appropriate for their use case.\n+ Misuse: Other forms of misuse such as fraud, spam, or malware production may be possible, and developers should ensure that their applications do not violate applicable laws and regulations.\n \n## License\nThe model is licensed under the [MIT license](./LICENSE).\n \n## Trademarks\nThis project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies.\n \n \n## Appendix A: Benchmark Methodology\n \nWe include a brief word on methodology here - and in particular, how we think about optimizing prompts. In an ideal world, we would never change any prompts in our benchmarks to ensure it is always an apples-to-apples comparison when comparing different models. Indeed, this is our default approach, and is the case in the vast majority of models we have run to date. For all benchmarks, we consider using the same generation configuration such as max sequence length (32768), the same temperature for the fair comparison.\nBenchmark datasets\nWe evaluate the model with three of the most popular math benchmarks where the strongest reasoning models are competing together. Specifically:\n-\tMath-500: This benchmark consists of 500 challenging math problems designed to test the model's ability to perform complex mathematical reasoning and problem-solving.\n-\tAIME 2024: The American Invitational Mathematics Examination (AIME) is a highly regarded math competition that features a series of difficult problems aimed at assessing advanced mathematical skills and logical reasoning.\n-\tGPQA Diamond: The Graduate-Level Google-Proof Q&A (GPQA) Diamond benchmark focuses on evaluating the model's ability to understand and solve a wide range of mathematical questions, including both straightforward calculations and more intricate problem-solving tasks.", "registry": "Hugging Face", "license": "MIT", "url": "https://huggingface.co/unsloth/Phi-4-mini-reasoning-GGUF/resolve/main/Phi-4-mini-reasoning-Q4_K_M.gguf", "properties": { "jinja": "true" }, "memory": 2480343613, "sha256": "81878401a2f8160473649af89560a7fc0932f3623e4f6e58143d5dcbf71d6480", "backend": "llama-cpp" }, { "id": "hf.microsoft.Phi-4-reasoning-plus", "name": "microsoft/Phi-4-reasoning-plus (Unsloth quantization)", "description": "## Model Summary\n\n| | | \n|-------------------------|-------------------------------------------------------------------------------|\n| **Developers** | Microsoft Research |\n| **Description** | Phi-4-reasoning-plus is a state-of-the-art open-weight reasoning model finetuned from Phi-4 using supervised fine-tuning on a dataset of chain-of-thought traces and reinforcement learning. The supervised fine-tuning dataset includes a blend of synthetic prompts and high-quality filtered data from public domain websites, focused on math, science, and coding skills as well as alignment data for safety and Responsible AI. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. Phi-4-reasoning-plus has been trained additionally with Reinforcement Learning, hence, it has higher accuracy but generates on average 50% more tokens, thus having higher latency. |\n| **Architecture** | Base model same as previously released Phi-4, 14B parameters, dense decoder-only Transformer model |\n| **Inputs** | Text, best suited for prompts in the chat format |\n| **Context length** | 32k tokens |\n| **GPUs** | 32 H100-80G |\n| **Training time** | 2.5 days |\n| **Training data** | 16B tokens, ~8.3B unique tokens |\n| **Outputs** | Generated text in response to the input. Model responses have two sections, namely, a reasoning chain-of-thought block followed by a summarization block |\n| **Dates** | January 2025 – April 2025 |\n| **Status** | Static model trained on an offline dataset with cutoff dates of March 2025 and earlier for publicly available data |\n| **Release date** | April 30, 2025 |\n| **License** | MIT |\n\n## Intended Use\n\n| | |\n|-------------------------------|-------------------------------------------------------------------------|\n| **Primary Use Cases** | Our model is designed to accelerate research on language models, for use as a building block for generative AI powered features. It provides uses for general purpose AI systems and applications (primarily in English) which require:

1. Memory/compute constrained environments.
2. Latency bound scenarios.
3. Reasoning and logic. |\n| **Out-of-Scope Use Cases** | This model is designed and tested for math reasoning only. Our models are not specifically designed or evaluated for all downstream purposes. Developers should consider common limitations of language models as they select use cases, and evaluate and mitigate for accuracy, safety, and fairness before using within a specific downstream use case, particularly for high-risk scenarios. Developers should be aware of and adhere to applicable laws or regulations (including privacy, trade compliance laws, etc.) that are relevant to their use case, including the model’s focus on English. Review the Responsible AI Considerations section below for further guidance when choosing a use case. Nothing contained in this Model Card should be interpreted as or deemed a restriction or modification to the license the model is released under. |\n\n## Usage\n\n> \n > To fully take advantage of the model's capabilities, inference must use `temperature=0.8`, `top_k=50`, `top_p=0.95`, and `do_sample=True`. For more complex queries, set `max_new_tokens=32768` to allow for longer chain-of-thought (CoT).\n *Phi-4-reasoning-plus has shown strong performance on reasoning-intensive tasks. In our experiments, we extended its maximum number of tokens to 64k, and it handled longer sequences with promising results, maintaining coherence and logical consistency over extended inputs. This makes it a compelling option to explore for tasks that require deep, multi-step reasoning or extensive context.*\n### Input Formats\nGiven the nature of the training data, **always use** ChatML template with the **following system prompt** for inference:\n```bash\n<|im_start|>system<|im_sep|>\nYou are Phi, a language model trained by Microsoft to help users. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: {Thought section} {Solution section}. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion. Now, try to solve the following question through the above guidelines:<|im_end|>\n<|im_start|>user<|im_sep|>\nWhat is the derivative of x^2?<|im_end|>\n<|im_start|>assistant<|im_sep|>\n```\n### With `transformers`\n```python\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\ntokenizer = AutoTokenizer.from_pretrained(\\\"microsoft/Phi-4-reasoning-plus\\\")\nmodel = AutoModelForCausalLM.from_pretrained(\\\"microsoft/Phi-4-reasoning-plus\\\", device_map=\\\"auto\\\", torch_dtype=\\\"auto\\\")\nmessages = [\n {\\\"role\\\": \\\"system\\\", \\\"content\\\": \\\"You are Phi, a language model trained by Microsoft to help users. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: {Thought section} {Solution section}. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion. Now, try to solve the following question through the above guidelines:\\\"},\n {\\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"What is the derivative of x^2?\\\"},\n]\ninputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors=\\\"pt\\\")\n\noutputs = model.generate(\n inputs.to(model.device),\n max_new_tokens=4096,\n temperature=0.8,\n top_k=50,\n top_p=0.95,\n do_sample=True,\n)\nprint(tokenizer.decode(outputs[0]))\n```\n### With `vllm`\n\n```bash\nvllm serve microsoft/Phi-4-reasoning-plus --enable-reasoning --reasoning-parser deepseek_r1\n```\n\n*Phi-4-reasoning-plus is also supported out-of-the-box by Ollama, llama.cpp, and any Phi-4 compatible framework.*\n\n## Data Overview\n\n### Training Datasets\n\nOur training data is a mixture of Q&A, chat format data in math, science, and coding. The chat prompts are sourced from filtered high-quality web data and optionally rewritten and processed through a synthetic data generation pipeline. We further include data to improve truthfulness and safety.\n\n### Benchmark Datasets\n\nWe evaluated Phi-4-reasoning-plus using the open-source [Eureka](https://github.com/microsoft/eureka-ml-insights) evaluation suite and our own internal benchmarks to understand the model's capabilities. More specifically, we evaluate our model on:\n\nReasoning tasks:\n\n* **AIME 2025, 2024, 2023, and 2022:** Math olympiad questions.\n\n* **GPQA-Diamond:** Complex, graduate-level science questions.\n\n* **OmniMath:** Collection of over 4000 olympiad-level math problems with human annotation.\n\n* **LiveCodeBench:** Code generation benchmark gathered from competitive coding contests.\n\n* **3SAT (3-literal Satisfiability Problem) and TSP (Traveling Salesman Problem):** Algorithmic problem solving.\n\n* **BA Calendar:** Planning.\n\n* **Maze and SpatialMap:** Spatial understanding.\n\nGeneral-purpose benchmarks:\n\n* **Kitab:** Information retrieval.\n\n* **IFEval and ArenaHard:** Instruction following.\n\n* **PhiBench:** Internal benchmark.\n\n* **FlenQA:** Impact of prompt length on model performance.\n\n* **HumanEvalPlus:** Functional code generation.\n\n* **MMLU-Pro:** Popular aggregated dataset for multitask language understanding.\n\n## Safety\n\n### Approach\n\nPhi-4-reasoning-plus has adopted a robust safety post-training approach via supervised fine-tuning (SFT). This approach leverages a variety of both open-source and in-house generated synthetic prompts, with LLM-generated responses that adhere to rigorous Microsoft safety guidelines, e.g., User Understanding and Clarity, Security and Ethical Guidelines, Limitations, Disclaimers and Knowledge Scope, Handling Complex and Sensitive Topics, Safety and Respectful Engagement, Confidentiality of Guidelines and Confidentiality of Chain-of-Thoughts. \n\n### Safety Evaluation and Red-Teaming\n\nPrior to release, Phi-4-reasoning-plus followed a multi-faceted evaluation approach. Quantitative evaluation was conducted with multiple open-source safety benchmarks and in-house tools utilizing adversarial conversation simulation. For qualitative safety evaluation, we collaborated with the independent AI Red Team (AIRT) at Microsoft to assess safety risks posed by Phi-4-reasoning-plus in both average and adversarial user scenarios. In the average user scenario, AIRT emulated typical single-turn and multi-turn interactions to identify potentially risky behaviors. The adversarial user scenario tested a wide range of techniques aimed at intentionally subverting the model's safety training including grounded-ness, jailbreaks, harmful content like hate and unfairness, violence, sexual content, or self-harm, and copyright violations for protected material. We further evaluate models on Toxigen, a benchmark designed to measure bias and toxicity targeted towards minority groups. \n\nPlease refer to the technical report for more details on safety alignment. \n\n## Model Quality\n\nAt the high-level overview of the model quality on representative benchmarks. For the tables below, higher numbers indicate better performance:\n\n| | AIME 24 | AIME 25 | OmniMath | GPQA-D | LiveCodeBench (8/1/24–2/1/25) |\n|-----------------------------|-------------|-------------|-------------|------------|-------------------------------|\n| Phi-4-reasoning | 75.3 | 62.9 | 76.6 | 65.8 | 53.8 |\n| Phi-4-reasoning-plus | 81.3 | 78.0 | 81.9 | 68.9 | 53.1 |\n| OpenThinker2-32B | 58.0 | 58.0 | — | 64.1 | — |\n| QwQ 32B | 79.5 | 65.8 | — | 59.5 | 63.4 |\n| EXAONE-Deep-32B | 72.1 | 65.8 | — | 66.1 | 59.5 |\n| DeepSeek-R1-Distill-70B | 69.3 | 51.5 | 63.4 | 66.2 | 57.5 |\n| DeepSeek-R1 | 78.7 | 70.4 | 85.0 | 73.0 | 62.8 |\n| o1-mini | 63.6 | 54.8 | — | 60.0 | 53.8 |\n| o1 | 74.6 | 75.3 | 67.5 | 76.7 | 71.0 |\n| o3-mini | 88.0 | 78.0 | 74.6 | 77.7 | 69.5 |\n| Claude-3.7-Sonnet | 55.3 | 58.7 | 54.6 | 76.8 | — |\n| Gemini-2.5-Pro | 92.0 | 86.7 | 61.1 | 84.0 | 69.2 |\n\n| | Phi-4 | Phi-4-reasoning | Phi-4-reasoning-plus | o3-mini | GPT-4o |\n|----------------------------------------|-------|------------------|-------------------|---------|--------|\n| FlenQA [3K-token subset] | 82.0 | 97.7 | 97.9 | 96.8 | 90.8 |\n| IFEval Strict | 62.3 | 83.4 | 84.9 | 91.5 | 81.8 |\n| ArenaHard | 68.1 | 73.3 | 79.0 | 81.9 | 75.6 |\n| HumanEvalPlus | 83.5 | 92.9 | 92.3 | 94.0| 88.0 |\n| MMLUPro | 71.5 | 74.3 | 76.0 | 79.4 | 73.0 |\n| Kitab
No Context - Precision
With Context - Precision
No Context - Recall
With Context - Recall
|
19.3
88.5
8.2
68.1 |
23.2
91.5
4.9
74.8 |
27.6
93.6
6.3
75.4 |
37.9
94.0
4.2
76.1 |
53.7
84.7
20.3
69.2 |\n| Toxigen Discriminative
Toxic category
Neutral category
|
72.6
90.0 |
86.7
84.7 |
77.3
90.5 |
85.4
88.7 |
87.6
85.1 |\n| PhiBench 2.21 | 58.2 | 70.6 | 74.2 | 78.0| 72.4 |\n\nOverall, Phi-4-reasoning and Phi-4-reasoning-plus, with only 14B parameters, performs well across a wide range of reasoning tasks, outperforming significantly larger open-weight models such as DeepSeek-R1 distilled 70B model and approaching the performance levels of full DeepSeek R1 model. We also test the models on multiple new reasoning benchmarks for algorithmic problem solving and planning, including 3SAT, TSP, and BA-Calendar. These new tasks are nominally out-of-domain for the models as the training process did not intentionally target these skills, but the models still show strong generalization to these tasks. Furthermore, when evaluating performance against standard general abilities benchmarks such as instruction following or non-reasoning tasks, we find that our new models improve significantly from Phi-4, despite the post-training being focused on reasoning skills in specific domains. \n\n## Responsible AI Considerations\n\nLike other language models, Phi-4-reasoning-plus can potentially behave in ways that are unfair, unreliable, or offensive. Some of the limiting behaviors to be aware of include: \n\n* **Quality of Service:** The model is trained primarily on English text. Languages other than English will experience worse performance. English language varieties with less representation in the training data might experience worse performance than standard American English. Phi-4-reasoning-plus is not intended to support multilingual use. \n\n* **Representation of Harms & Perpetuation of Stereotypes:** These models can over- or under-represent groups of people, erase representation of some groups, or reinforce demeaning or negative stereotypes. Despite safety post-training, these limitations may still be present due to differing levels of representation of different groups or prevalence of examples of negative stereotypes in training data that reflect real-world patterns and societal biases. \n\n* **Inappropriate or Offensive Content:** These models may produce other types of inappropriate or offensive content, which may make it inappropriate to deploy for sensitive contexts without additional mitigations that are specific to the use case. \n\n* **Information Reliability:** Language models can generate nonsensical content or fabricate content that might sound reasonable but is inaccurate or outdated.\n\n* **Election Information Reliability:** The model has an elevated defect rate when responding to election-critical queries, which may result in incorrect or unauthoritative election critical information being presented. We are working to improve the model's performance in this area. Users should verify information related to elections with the election authority in their region. \n\n* **Limited Scope for Code:** Majority of Phi-4-reasoning-plus training data is based in Python and uses common packages such as `typing`, `math`, `random`, `collections`, `datetime`, `itertools`. If the model generates Python scripts that utilize other packages or scripts in other languages, we strongly recommend users manually verify all API uses. \n\nDevelopers should apply responsible AI best practices and are responsible for ensuring that a specific use case complies with relevant laws and regulations (e.g. privacy, trade, etc.). Using safety services like [Azure AI Content Safety](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety) that have advanced guardrails is highly recommended. Important areas for consideration include:\n\n* **Allocation:** Models may not be suitable for scenarios that could have consequential impact on legal status or the allocation of resources or life opportunities (ex: housing, employment, credit, etc.) without further assessments and additional debiasing techniques. \n\n* **High-Risk Scenarios:** Developers should assess suitability of using models in high-risk scenarios where unfair, unreliable or offensive outputs might be extremely costly or lead to harm. This includes providing advice in sensitive or expert domains where accuracy and reliability are critical (ex: legal or health advice). Additional safeguards should be implemented at the application level according to the deployment context. \n\n* **Misinformation:** Models may produce inaccurate information. Developers should follow transparency best practices and inform end-users they are interacting with an AI system. At the application level, developers can build feedback mechanisms and pipelines to ground responses in use-case specific, contextual information, a technique known as Retrieval Augmented Generation (RAG). \n\n* **Generation of Harmful Content:** Developers should assess outputs for their context and use available safety classifiers or custom solutions appropriate for their use case. \n\n* **Misuse:** Other forms of misuse such as fraud, spam, or malware production may be possible, and developers should ensure that their applications do not violate applicable laws and regulations.\n", "registry": "Hugging Face", "license": "mit", "url": "https://huggingface.co/unsloth/Phi-4-reasoning-plus-GGUF/resolve/main/Phi-4-reasoning-plus-Q4_K_M.gguf", "memory": 9715463520, "properties": { "jinja": "true" }, "sha256": "faf720745e20df40f52ee218be14c72b33070f7aacc508b3fbc61d47f32b4ffe", "backend": "llama-cpp" }, { "id": "hf.google.gemma-3n-E4B", "name": "google/gemma-3n-E4B (Unsloth quantization)", "description": "# Gemma 3n model card\n\n**Model Page**: [Gemma 3n](https://ai.google.dev/gemma/docs/gemma-3n)\n\n**Resources and Technical Documentation**:\n\n- [Responsible Generative AI Toolkit](https://ai.google.dev/responsible)\n- [Gemma on Kaggle](https://www.kaggle.com/models/google/gemma-3n)\n- [Gemma on HuggingFace](https://huggingface.co/collections/google/gemma-3n-685065323f5984ef315c93f4)\n- [Gemma on Vertex Model Garden](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/gemma3n)\n\n**Terms of Use**: [Terms](https://ai.google.dev/gemma/terms)\\\n**Authors**: Google DeepMind\n\n## Model Information\n\nSummary description and brief definition of inputs and outputs.\n\n### Description\n\nGemma is a family of lightweight, state-of-the-art open models from Google,\nbuilt from the same research and technology used to create the Gemini models.\nGemma 3n models are designed for efficient execution on low-resource devices.\nThey are capable of multimodal input, handling text, image, video, and audio\ninput, and generating text outputs, with open weights for pre-trained and\ninstruction-tuned variants. These models were trained with data in over 140\nspoken languages.\n\nGemma 3n models use selective parameter activation technology to reduce resource\nrequirements. This technique allows the models to operate at an effective size\nof 2B and 4B parameters, which is lower than the total number of parameters they\ncontain. For more information on Gemma 3n's efficient parameter management\ntechnology, see the\n[Gemma 3n](https://ai.google.dev/gemma/docs/gemma-3n#parameters)\npage.\n\n### Inputs and outputs\n\n- **Input:**\n - Text string, such as a question, a prompt, or a document to be\n summarized\n - Images, normalized to 256x256, 512x512, or 768x768 resolution\n and encoded to 256 tokens each\n - Audio data encoded to 6.25 tokens per second from a single channel\n - Total input context of 32K tokens\n- **Output:**\n - Generated text in response to the input, such as an answer to a\n question, analysis of image content, or a summary of a document\n - Total output length up to 32K tokens, subtracting the request\n input tokens\n\n### Usage\n\nBelow, there are some code snippets on how to get quickly started with running\nthe model. First, install the Transformers library. Gemma 3n is supported\nstarting from transformers 4.53.0.\n\n```sh\n$ pip install -U transformers\n```\n\nThen, copy the snippet from the section that is relevant for your use case.\n\n#### Running with the `pipeline` API\n\nYou can initialize the model and processor for inference with `pipeline` as\nfollows.\n\n```python\nfrom transformers import pipeline\nimport torch\n\npipe = pipeline(\n \"image-text-to-text\",\n model=\"google/gemma-3n-e4b-it\",\n device=\"cuda\",\n torch_dtype=torch.bfloat16,\n)\n```\n\nWith instruction-tuned models, you need to use chat templates to process our\ninputs first. Then, you can pass it to the pipeline.\n\n```python\nmessages = [\n {\n \"role\": \"system\",\n \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"url\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG\"},\n {\"type\": \"text\", \"text\": \"What animal is on the candy?\"}\n ]\n }\n]\n\noutput = pipe(text=messages, max_new_tokens=200)\nprint(output[0][\"generated_text\"][-1][\"content\"])\n# Okay, let's take a look!\n# Based on the image, the animal on the candy is a **turtle**.\n# You can see the shell shape and the head and legs.\n```\n\n#### Running the model on a single GPU\n\n```python\nfrom transformers import AutoProcessor, Gemma3nForConditionalGeneration\nfrom PIL import Image\nimport requests\nimport torch\n\nmodel_id = \"google/gemma-3n-e4b-it\"\n\nmodel = Gemma3nForConditionalGeneration.from_pretrained(model_id, device_map=\"auto\", torch_dtype=torch.bfloat16,).eval()\n\nprocessor = AutoProcessor.from_pretrained(model_id)\n\nmessages = [\n {\n \"role\": \"system\",\n \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"image\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg\"},\n {\"type\": \"text\", \"text\": \"Describe this image in detail.\"}\n ]\n }\n]\n\ninputs = processor.apply_chat_template(\n messages,\n add_generation_prompt=True,\n tokenize=True,\n return_dict=True,\n return_tensors=\"pt\",\n).to(model.device)\n\ninput_len = inputs[\"input_ids\"].shape[-1]\n\nwith torch.inference_mode():\n generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)\n generation = generation[0][input_len:]\n\ndecoded = processor.decode(generation, skip_special_tokens=True)\nprint(decoded)\n\n# **Overall Impression:** The image is a close-up shot of a vibrant garden scene,\n# focusing on a cluster of pink cosmos flowers and a busy bumblebee.\n# It has a slightly soft, natural feel, likely captured in daylight.\n```\n\n### Citation\n\n```\n@article{gemma_3n_2025,\n title={Gemma 3n},\n url={https://ai.google.dev/gemma/docs/gemma-3n},\n publisher={Google DeepMind},\n author={Gemma Team},\n year={2025}\n}\n```\n\n## Model Data\n\nData used for model training and how the data was processed.\n\n### Training Dataset\n\nThese models were trained on a dataset that includes a wide variety of sources\ntotalling approximately 11 trillion tokens. The knowledge cutoff date for the\ntraining data was June 2024. Here are the key components:\n\n- **Web Documents**: A diverse collection of web text ensures the model\n is exposed to a broad range of linguistic styles, topics, and vocabulary.\n The training dataset includes content in over 140 languages.\n- **Code**: Exposing the model to code helps it to learn the syntax and\n patterns of programming languages, which improves its ability to generate\n code and understand code-related questions.\n- **Mathematics**: Training on mathematical text helps the model learn\n logical reasoning, symbolic representation, and to address mathematical queries.\n- **Images**: A wide range of images enables the model to perform image\n analysis and visual data extraction tasks.\n- Audio: A diverse set of sound samples enables the model to recognize\n speech, transcribe text from recordings, and identify information in audio data.\n\nThe combination of these diverse data sources is crucial for training a\npowerful multimodal model that can handle a wide variety of different tasks and\ndata formats.\n\n### Data Preprocessing\n\nHere are the key data cleaning and filtering methods applied to the training\ndata:\n\n- **CSAM Filtering**: Rigorous CSAM (Child Sexual Abuse Material)\n filtering was applied at multiple stages in the data preparation process to\n ensure the exclusion of harmful and illegal content.\n- **Sensitive Data Filtering**: As part of making Gemma pre-trained models\n safe and reliable, automated techniques were used to filter out certain\n personal information and other sensitive data from training sets.\n- **Additional methods**: Filtering based on content quality and safety in\n line with\n [our policies](https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf).\n\n## Implementation Information\n\nDetails about the model internals.\n\n### Hardware\n\nGemma was trained using [Tensor Processing Unit\n(TPU)](https://cloud.google.com/tpu/docs/intro-to-tpu) hardware (TPUv4p, TPUv5p\nand TPUv5e). Training generative models requires significant computational\npower. TPUs, designed specifically for matrix operations common in machine\nlearning, offer several advantages in this domain:\n\n- **Performance**: TPUs are specifically designed to handle the massive\n computations involved in training generative models. They can speed up\n training considerably compared to CPUs.\n- **Memory**: TPUs often come with large amounts of high-bandwidth memory,\n allowing for the handling of large models and batch sizes during training.\n This can lead to better model quality.\n- **Scalability**: TPU Pods (large clusters of TPUs) provide a scalable\n solution for handling the growing complexity of large foundation models.\n You can distribute training across multiple TPU devices for faster and more\n efficient processing.\n- **Cost-effectiveness**: In many scenarios, TPUs can provide a more\n cost-effective solution for training large models compared to CPU-based\n infrastructure, especially when considering the time and resources saved\n due to faster training.\n\nThese advantages are aligned with\n[Google's commitments to operate sustainably](https://sustainability.google/operating-sustainably/).\n\n### Software\n\nTraining was done using [JAX](https://github.com/jax-ml/jax) and\n[ML Pathways](https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/).\nJAX allows researchers to take advantage of the latest generation of hardware,\nincluding TPUs, for faster and more efficient training of large models. ML\nPathways is Google's latest effort to build artificially intelligent systems\ncapable of generalizing across multiple tasks. This is specially suitable for\nfoundation models, including large language models like these ones.\n\nTogether, JAX and ML Pathways are used as described in the\n[paper about the Gemini family of models](https://goo.gle/gemma2report):\n*\"the 'single controller' programming model of Jax and Pathways allows a single\nPython process to orchestrate the entire training run, dramatically simplifying\nthe development workflow.\"*\n\n## Evaluation\n\nModel evaluation metrics and results.\n\n### Benchmark Results\n\nThese models were evaluated at full precision (float32) against a large\ncollection of different datasets and metrics to cover different aspects of\ncontent generation. Evaluation results marked with **IT** are for\ninstruction-tuned models. Evaluation results marked with **PT** are for\npre-trained models.\n\n#### Reasoning and factuality\n\n| Benchmark | Metric | n-shot | E2B PT | E4B PT |\n| ------------------------------ |----------------|----------|:--------:|:--------:|\n| [HellaSwag][hellaswag] | Accuracy | 10-shot | 72.2 | 78.6 |\n| [BoolQ][boolq] | Accuracy | 0-shot | 76.4 | 81.6 |\n| [PIQA][piqa] | Accuracy | 0-shot | 78.9 | 81.0 |\n| [SocialIQA][socialiqa] | Accuracy | 0-shot | 48.8 | 50.0 |\n| [TriviaQA][triviaqa] | Accuracy | 5-shot | 60.8 | 70.2 |\n| [Natural Questions][naturalq] | Accuracy | 5-shot | 15.5 | 20.9 |\n| [ARC-c][arc] | Accuracy | 25-shot | 51.7 | 61.6 |\n| [ARC-e][arc] | Accuracy | 0-shot | 75.8 | 81.6 |\n| [WinoGrande][winogrande] | Accuracy | 5-shot | 66.8 | 71.7 |\n| [BIG-Bench Hard][bbh] | Accuracy | few-shot | 44.3 | 52.9 |\n| [DROP][drop] | Token F1 score | 1-shot | 53.9 | 60.8 |\n\n[hellaswag]: https://arxiv.org/abs/1905.07830\n[boolq]: https://arxiv.org/abs/1905.10044\n[piqa]: https://arxiv.org/abs/1911.11641\n[socialiqa]: https://arxiv.org/abs/1904.09728\n[triviaqa]: https://arxiv.org/abs/1705.03551\n[naturalq]: https://github.com/google-research-datasets/natural-questions\n[arc]: https://arxiv.org/abs/1911.01547\n[winogrande]: https://arxiv.org/abs/1907.10641\n[bbh]: https://paperswithcode.com/dataset/bbh\n[drop]: https://arxiv.org/abs/1903.00161\n\n#### Multilingual\n\n| Benchmark | Metric | n-shot | E2B IT | E4B IT |\n| ------------------------------------|-------------------------|----------|:--------:|:--------:|\n| [MGSM][mgsm] | Accuracy | 0-shot | 53.1 | 60.7 |\n| [WMT24++][wmt24pp] (ChrF) | Character-level F-score | 0-shot | 42.7 | 50.1 |\n| [Include][include] | Accuracy | 0-shot | 38.6 | 57.2 |\n| [MMLU][mmlu] (ProX) | Accuracy | 0-shot | 8.1 | 19.9 |\n| [OpenAI MMLU][openai-mmlu] | Accuracy | 0-shot | 22.3 | 35.6 |\n| [Global-MMLU][global-mmlu] | Accuracy | 0-shot | 55.1 | 60.3 |\n| [ECLeKTic][eclektic] | ECLeKTic score | 0-shot | 2.5 | 1.9 |\n\n[mgsm]: https://arxiv.org/abs/2210.03057\n[wmt24pp]: https://arxiv.org/abs/2502.12404v1\n[include]:https://arxiv.org/abs/2411.19799\n[mmlu]: https://arxiv.org/abs/2009.03300\n[openai-mmlu]: https://huggingface.co/datasets/openai/MMMLU\n[global-mmlu]: https://huggingface.co/datasets/CohereLabs/Global-MMLU\n[eclektic]: https://arxiv.org/abs/2502.21228\n\n#### STEM and code\n\n| Benchmark | Metric | n-shot | E2B IT | E4B IT |\n| ------------------------------------|--------------------------|----------|:--------:|:--------:|\n| [GPQA][gpqa] Diamond | RelaxedAccuracy/accuracy | 0-shot | 24.8 | 23.7 |\n| [LiveCodeBench][lcb] v5 | pass@1 | 0-shot | 18.6 | 25.7 |\n| Codegolf v2.2 | pass@1 | 0-shot | 11.0 | 16.8 |\n| [AIME 2025][aime-2025] | Accuracy | 0-shot | 6.7 | 11.6 |\n\n[gpqa]: https://arxiv.org/abs/2311.12022\n[lcb]: https://arxiv.org/abs/2403.07974\n[aime-2025]: https://www.vals.ai/benchmarks/aime-2025-05-09\n\n#### Additional benchmarks\n\n| Benchmark | Metric | n-shot | E2B IT | E4B IT |\n| ------------------------------------ |------------|----------|:--------:|:--------:|\n| [MMLU][mmlu] | Accuracy | 0-shot | 60.1 | 64.9 |\n| [MBPP][mbpp] | pass@1 | 3-shot | 56.6 | 63.6 |\n| [HumanEval][humaneval] | pass@1 | 0-shot | 66.5 | 75.0 |\n| [LiveCodeBench][lcb] | pass@1 | 0-shot | 13.2 | 13.2 |\n| HiddenMath | Accuracy | 0-shot | 27.7 | 37.7 |\n| [Global-MMLU-Lite][global-mmlu-lite] | Accuracy | 0-shot | 59.0 | 64.5 |\n| [MMLU][mmlu] (Pro) | Accuracy | 0-shot | 40.5 | 50.6 |\n\n[gpqa]: https://arxiv.org/abs/2311.12022\n[mbpp]: https://arxiv.org/abs/2108.07732\n[humaneval]: https://arxiv.org/abs/2107.03374\n[lcb]: https://arxiv.org/abs/2403.07974\n[global-mmlu-lite]: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite\n\n## Ethics and Safety\n\nEthics and safety evaluation approach and results.\n\n### Evaluation Approach\n\nOur evaluation methods include structured evaluations and internal red-teaming\ntesting of relevant content policies. Red-teaming was conducted by a number of\ndifferent teams, each with different goals and human evaluation metrics. These\nmodels were evaluated against a number of different categories relevant to\nethics and safety, including:\n\n- **Child Safety**: Evaluation of text-to-text and image to text prompts\n covering child safety policies, including child sexual abuse and\n exploitation.\n- **Content Safety:** Evaluation of text-to-text and image to text prompts\n covering safety policies including, harassment, violence and gore, and hate\n speech.\n- **Representational Harms**: Evaluation of text-to-text and image to text\n prompts covering safety policies including bias, stereotyping, and harmful\n associations or inaccuracies.\n\nIn addition to development level evaluations, we conduct \"assurance\nevaluations\" which are our 'arms-length' internal evaluations for responsibility\ngovernance decision making. They are conducted separately from the model\ndevelopment team, to inform decision making about release. High level findings\nare fed back to the model team, but prompt sets are held-out to prevent\noverfitting and preserve the results' ability to inform decision making. Notable\nassurance evaluation results are reported to our Responsibility & Safety Council\nas part of release review.\n\n### Evaluation Results\n\nFor all areas of safety testing, we saw safe levels of performance across the\ncategories of child safety, content safety, and representational harms relative\nto previous Gemma models. All testing was conducted without safety filters to\nevaluate the model capabilities and behaviors. For text-to-text, image-to-text,\nand audio-to-text, and across all model sizes, the model produced minimal policy\nviolations, and showed significant improvements over previous Gemma models'\nperformance with respect to high severity violations. A limitation of our\nevaluations was they included primarily English language prompts.\n\n## Usage and Limitations\n\nThese models have certain limitations that users should be aware of.\n\n### Intended Usage\n\nOpen generative models have a wide range of applications across various\nindustries and domains. The following list of potential uses is not\ncomprehensive. The purpose of this list is to provide contextual information\nabout the possible use-cases that the model creators considered as part of model\ntraining and development.\n\n- Content Creation and Communication\n - **Text Generation**: Generate creative text formats such as\n poems, scripts, code, marketing copy, and email drafts.\n - **Chatbots and Conversational AI**: Power conversational\n interfaces for customer service, virtual assistants, or interactive\n applications.\n - **Text Summarization**: Generate concise summaries of a text\n corpus, research papers, or reports.\n - **Image Data Extraction**: Extract, interpret, and summarize\n visual data for text communications.\n - **Audio Data Extraction**: Transcribe spoken language, translate speech\n to text in other languages, and analyze sound-based data.\n- Research and Education\n - **Natural Language Processing (NLP) and generative model\n Research**: These models can serve as a foundation for researchers to\n experiment with generative models and NLP techniques, develop\n algorithms, and contribute to the advancement of the field.\n - **Language Learning Tools**: Support interactive language\n learning experiences, aiding in grammar correction or providing writing\n practice.\n - **Knowledge Exploration**: Assist researchers in exploring large\n bodies of data by generating summaries or answering questions about\n specific topics.\n\n### Limitations\n\n- Training Data\n - The quality and diversity of the training data significantly\n influence the model's capabilities. Biases or gaps in the training data\n can lead to limitations in the model's responses.\n - The scope of the training dataset determines the subject areas\n the model can handle effectively.\n- Context and Task Complexity\n - Models are better at tasks that can be framed with clear\n prompts and instructions. Open-ended or highly complex tasks might be\n challenging.\n - A model's performance can be influenced by the amount of context\n provided (longer context generally leads to better outputs, up to a\n certain point).\n- Language Ambiguity and Nuance\n - Natural language is inherently complex. Models might struggle\n to grasp subtle nuances, sarcasm, or figurative language.\n- Factual Accuracy\n - Models generate responses based on information they learned\n from their training datasets, but they are not knowledge bases. They\n may generate incorrect or outdated factual statements.\n- Common Sense\n - Models rely on statistical patterns in language. They might\n lack the ability to apply common sense reasoning in certain situations.\n\n### Ethical Considerations and Risks\n\nThe development of generative models raises several ethical concerns. In\ncreating an open model, we have carefully considered the following:\n\n- Bias and Fairness\n - Generative models trained on large-scale, real-world text and image data\n can reflect socio-cultural biases embedded in the training material.\n These models underwent careful scrutiny, input data pre-processing\n described and posterior evaluations reported in this card.\n- Misinformation and Misuse\n - Generative models can be misused to generate text that is\n false, misleading, or harmful.\n - Guidelines are provided for responsible use with the model, see the\n [Responsible Generative AI Toolkit](https://ai.google.dev/responsible).\n- Transparency and Accountability:\n - This model card summarizes details on the models' architecture,\n capabilities, limitations, and evaluation processes.\n - A responsibly developed open model offers the opportunity to\n share innovation by making generative model technology accessible to\n developers and researchers across the AI ecosystem.\n\nRisks identified and mitigations:\n\n- **Perpetuation of biases**: It's encouraged to perform continuous monitoring\n (using evaluation metrics, human review) and the exploration of de-biasing\n techniques during model training, fine-tuning, and other use cases.\n- **Generation of harmful content**: Mechanisms and guidelines for content\n safety are essential. Developers are encouraged to exercise caution and\n implement appropriate content safety safeguards based on their specific\n product policies and application use cases.\n- **Misuse for malicious purposes**: Technical limitations and developer\n and end-user education can help mitigate against malicious applications of\n generative models. Educational resources and reporting mechanisms for users\n to flag misuse are provided. Prohibited uses of Gemma models are outlined\n in the\n [Gemma Prohibited Use Policy](https://ai.google.dev/gemma/prohibited_use_policy).\n- **Privacy violations**: Models were trained on data filtered for removal of\n certain personal information and other sensitive data. Developers are\n encouraged to adhere to privacy regulations with privacy-preserving\n techniques.\n\n### Benefits\n\nAt the time of release, this family of models provides high-performance open\ngenerative model implementations designed from the ground up for responsible AI\ndevelopment compared to similarly sized models.\n\nUsing the benchmark evaluation metrics described in this document, these models\nhave shown to provide superior performance to other, comparably-sized open model\nalternatives.", "registry": "Hugging Face", "license": "gemma", "url": "https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF/resolve/main/gemma-3n-E4B-it-Q4_K_M.gguf", "memory": 4425974, "properties": { "jinja": "true" }, "sha256": "43b489bb77a81bda85180e7c490d40ad7f1d5c2ce654c9b05e15e104bd3c777e", "backend": "llama-cpp" }, { "id": "OpenVINO/mistral-7B-instruct-v0.2-int4-ov", "name": "OpenVINO/mistral-7B-instruct-v0.2-int4-ov", "description": "# Mistral-7B-Instruct-v0.2-int4-ov\n* Model creator: [Mistral AI](https://huggingface.co/mistralai)\n * Original model: [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)\n\n## Description\n\nThis is [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) model converted to the [OpenVINO™ IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) (Intermediate Representation) format.\n\n## Compatibility\n\nThe provided OpenVINO™ IR model is compatible with:\n\n* OpenVINO version 2024.2.0 and higher\n* Optimum Intel 1.19.0 and higher\n\n## Running Model Inference with [Optimum Intel](https://huggingface.co/docs/optimum/intel/index)\n\n\n1. Install packages required for using [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) integration with the OpenVINO backend:\n\n```\npip install optimum[openvino]\n```\n\n2. Run model inference:\n\n```\nfrom transformers import AutoTokenizer\nfrom optimum.intel.openvino import OVModelForCausalLM\n\nmodel_id = \"OpenVINO/\"\ntokenizer = AutoTokenizer.from_pretrained(model_id)\nmodel = OVModelForCausalLM.from_pretrained(model_id)\n\ninputs = tokenizer(\"What is OpenVINO?\", return_tensors=\"pt\")\n\noutputs = model.generate(**inputs, max_length=200)\ntext = tokenizer.batch_decode(outputs)[0]\nprint(text)\n```\n\nFor more examples and possible optimizations, refer to the [OpenVINO Large Language Model Inference Guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).\n\n## Running Model Inference with [OpenVINO GenAI](https://github.com/openvinotoolkit/openvino.genai)\n\n1. Install packages required for using OpenVINO GenAI.\n```\npip install openvino-genai huggingface_hub\n```\n\n2. Download model from HuggingFace Hub\n \n```\nimport huggingface_hub as hf_hub\n\nmodel_id = \"OpenVINO/Mistral-7B-Instruct-v0.2-int4-ov\"\nmodel_path = \"Mistral-7B-Instruct-v0.2-int4-ov\"\n\nhf_hub.snapshot_download(model_id, local_dir=model_path)\n\n```\n\n3. Run model inference:\n\n```\nimport openvino_genai as ov_genai\n\ndevice = \"CPU\"\npipe = ov_genai.LLMPipeline(model_path, device)\nprint(pipe.generate(\"What is OpenVINO?\", max_length=200))\n```\n\nMore GenAI usage examples can be found in OpenVINO GenAI library [docs](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md) and [samples](https://github.com/openvinotoolkit/openvino.genai?tab=readme-ov-file#openvino-genai-samples)\n\n## Limitations\n\nCheck the original model card for [limitations](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2#limitations).\n\n## Legal information\n\nThe original model is distributed under [apache-2.0](https://choosealicense.com/licenses/apache-2.0/) license. More details can be found in [original model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2).\n\n## Disclaimer\n\nIntel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights.", "registry": "Hugging Face", "license": "Apache-2.0", "url": "huggingface:/OpenVINO/mistral-7B-instruct-v0.2-int4-ov", "backend": "openvino" } ], "categories": [ { "id": "natural-language-processing", "name": "Natural Language Processing", "description": "Models that work with text: classify, summarize, translate, or generate text." }, { "id": "computer-vision", "description": "Process images, from classification to object detection and segmentation.", "name": "Computer Vision" }, { "id": "audio", "description": "Recognize speech or classify audio with audio models.", "name": "Audio" }, { "id": "multimodal", "description": "Stuff about multimodal models goes here omg yes amazing.", "name": "Multimodal" } ] } ================================================ FILE: packages/backend/src/assets/inference-images.json ================================================ { "whispercpp": { "default": "quay.io/ramalama/ramalama-whisper-server@sha256:2ce4e2751672e3baf76d6f220100160da86ff5a98001b76392aeae9da2d90b18" }, "llamacpp": { "default": "quay.io/ramalama/ramalama-llama-server@sha256:293f66f2dfea8e21393dc03e898616b2a71f0a72a0f3bc5f936439130ada2648", "cuda": "quay.io/ramalama/cuda-llama-server@sha256:b9ced640539c72edee2f946b69618a6d30b68700ac9342d1b9483831988d40ef", "intel": "quay.io/ramalama/intel-gpu-llama-server@sha256:ea2aa37c0a4af544de80da9d8aa53a0641c91ccfdca3a329a251685a96210551" }, "openvino": { "default": "quay.io/ramalama/openvino@sha256:e026ecbdf6ae222a193badad5b0dd2253362e366e22c8b402f5a492803b10fd5" } } ================================================ FILE: packages/backend/src/assets/instructlab-images.json ================================================ { "default": "docker.io/redhat/instructlab@sha256:c6b2ecb4547b1f43b5539ee99bdbf5c9ae40599fabe1c740622295d9721b91c4" } ================================================ FILE: packages/backend/src/assets/llama-stack-images.json ================================================ { "default": "ghcr.io/containers/podman-ai-lab-stack:a06f399ebf7cb2645af126da0e84395db9bb0d1a" } ================================================ FILE: packages/backend/src/assets/llama-stack-playground-images.json ================================================ { "default": "quay.io/podman-ai-lab/llama-stack-playground@sha256:2ee73137c0b2b401c2703b5881dd84c07f0baa385408e7c02f076a2804c689c2" } ================================================ FILE: packages/backend/src/assets/openai.json ================================================ { "openapi": "3.1.0", "info": { "title": "OpenAI API", "version": "0.3.2" }, "servers": [ { "url": "", "description": "description" } ], "paths": { "/v1/completions": { "post": { "tags": ["OpenAI V1"], "summary": "Completion", "operationId": "create_completion_v1_completions_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateCompletionRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "anyOf": [ { "$ref": "#/components/schemas/CreateCompletionResponse" }, { "type": "string" }, { "$ref": "#/components/schemas/CreateCompletionResponse" } ], "title": "Completion response, when stream=False" } }, "text/event-stream": { "schema": { "type": "string", "title": "Server Side Streaming response, when stream=True. See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", "example": "data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "security": [ { "HTTPBearer": [] } ] } }, "/v1/embeddings": { "post": { "tags": ["OpenAI V1"], "summary": "Embedding", "operationId": "create_embedding_v1_embeddings_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateEmbeddingRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "security": [ { "HTTPBearer": [] } ] } }, "/v1/chat/completions": { "post": { "tags": ["OpenAI V1"], "summary": "Chat", "operationId": "create_chat_completion_v1_chat_completions_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateChatCompletionRequest" }, "examples": { "normal": { "summary": "Chat Completion", "value": { "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "What is the capital of France?" } ] } }, "json_mode": { "summary": "JSON Mode", "value": { "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Who won the world series in 2020" } ], "response_format": { "type": "json_object" } } }, "tool_calling": { "summary": "Tool Calling", "value": { "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Extract Jason is 30 years old." } ], "tools": [ { "type": "function", "function": { "name": "User", "description": "User record", "parameters": { "type": "object", "properties": { "name": { "type": "string" }, "age": { "type": "number" } }, "required": ["name", "age"] } } } ], "tool_choice": { "type": "function", "function": { "name": "User" } } } }, "logprobs": { "summary": "Logprobs", "value": { "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "What is the capital of France?" } ], "logprobs": true, "top_logprobs": 10 } } } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "anyOf": [ { "$ref": "#/components/schemas/CreateChatCompletionResponse" }, { "type": "string" }, { "$ref": "#/components/schemas/CreateChatCompletionResponse" } ], "title": "Completion response, when stream=False" } }, "text/event-stream": { "schema": { "type": "string", "title": "Server Side Streaming response, when stream=TrueSee SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", "example": "data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "security": [ { "HTTPBearer": [] } ] } }, "/v1/models": { "get": { "tags": ["OpenAI V1"], "summary": "Models", "operationId": "get_models_v1_models_get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ModelList" } } } } }, "security": [ { "HTTPBearer": [] } ] } }, "/extras/tokenize": { "post": { "tags": ["Extras"], "summary": "Tokenize", "operationId": "tokenize_extras_tokenize_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeInputRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeInputResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "security": [ { "HTTPBearer": [] } ] } }, "/extras/tokenize/count": { "post": { "tags": ["Extras"], "summary": "Tokenize Count", "operationId": "count_query_tokens_extras_tokenize_count_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeInputRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TokenizeInputCountResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "security": [ { "HTTPBearer": [] } ] } }, "/extras/detokenize": { "post": { "tags": ["Extras"], "summary": "Detokenize", "operationId": "detokenize_extras_detokenize_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DetokenizeInputRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DetokenizeInputResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "security": [ { "HTTPBearer": [] } ] } } }, "components": { "schemas": { "ChatCompletionFunction": { "properties": { "name": { "type": "string", "title": "Name" }, "description": { "type": "string", "title": "Description" }, "parameters": { "additionalProperties": { "anyOf": [ { "type": "integer" }, { "type": "string" }, { "type": "boolean" }, { "items": {}, "type": "array" }, { "type": "object" }, { "type": "null" } ] }, "type": "object", "title": "Parameters" } }, "type": "object", "required": ["name", "parameters"], "title": "ChatCompletionFunction" }, "ChatCompletionMessageToolCall": { "properties": { "id": { "type": "string", "title": "Id" }, "type": { "type": "string", "const": "function", "title": "Type" }, "function": { "$ref": "#/components/schemas/ChatCompletionMessageToolCallFunction" } }, "type": "object", "required": ["id", "type", "function"], "title": "ChatCompletionMessageToolCall" }, "ChatCompletionMessageToolCallFunction": { "properties": { "name": { "type": "string", "title": "Name" }, "arguments": { "type": "string", "title": "Arguments" } }, "type": "object", "required": ["name", "arguments"], "title": "ChatCompletionMessageToolCallFunction" }, "ChatCompletionNamedToolChoice": { "properties": { "type": { "type": "string", "const": "function", "title": "Type" }, "function": { "$ref": "#/components/schemas/ChatCompletionNamedToolChoiceFunction" } }, "type": "object", "required": ["type", "function"], "title": "ChatCompletionNamedToolChoice" }, "ChatCompletionNamedToolChoiceFunction": { "properties": { "name": { "type": "string", "title": "Name" } }, "type": "object", "required": ["name"], "title": "ChatCompletionNamedToolChoiceFunction" }, "ChatCompletionRequestAssistantMessage": { "properties": { "role": { "type": "string", "const": "assistant", "title": "Role" }, "content": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Content" }, "tool_calls": { "items": { "$ref": "#/components/schemas/ChatCompletionMessageToolCall" }, "type": "array", "title": "Tool Calls" }, "function_call": { "$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageFunctionCall" } }, "type": "object", "required": ["role", "content"], "title": "ChatCompletionRequestAssistantMessage" }, "ChatCompletionRequestAssistantMessageFunctionCall": { "properties": { "name": { "type": "string", "title": "Name" }, "arguments": { "type": "string", "title": "Arguments" } }, "type": "object", "required": ["name", "arguments"], "title": "ChatCompletionRequestAssistantMessageFunctionCall" }, "ChatCompletionRequestFunctionCallOption": { "properties": { "name": { "type": "string", "title": "Name" } }, "type": "object", "required": ["name"], "title": "ChatCompletionRequestFunctionCallOption" }, "ChatCompletionRequestFunctionMessage": { "properties": { "role": { "type": "string", "const": "function", "title": "Role" }, "content": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Content" }, "name": { "type": "string", "title": "Name" } }, "type": "object", "required": ["role", "content", "name"], "title": "ChatCompletionRequestFunctionMessage" }, "ChatCompletionRequestMessageContentPartImage": { "properties": { "type": { "type": "string", "const": "image_url", "title": "Type" }, "image_url": { "anyOf": [ { "type": "string" }, { "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImageImageUrl" } ], "title": "Image Url" } }, "type": "object", "required": ["type", "image_url"], "title": "ChatCompletionRequestMessageContentPartImage" }, "ChatCompletionRequestMessageContentPartImageImageUrl": { "properties": { "url": { "type": "string", "title": "Url" }, "detail": { "type": "string", "enum": ["auto", "low", "high"], "title": "Detail" } }, "type": "object", "required": ["url"], "title": "ChatCompletionRequestMessageContentPartImageImageUrl" }, "ChatCompletionRequestMessageContentPartText": { "properties": { "type": { "type": "string", "const": "text", "title": "Type" }, "text": { "type": "string", "title": "Text" } }, "type": "object", "required": ["type", "text"], "title": "ChatCompletionRequestMessageContentPartText" }, "ChatCompletionRequestResponseFormat": { "properties": { "type": { "type": "string", "enum": ["text", "json_object"], "title": "Type" }, "schema": { "anyOf": [ { "type": "integer" }, { "type": "string" }, { "type": "boolean" }, { "items": {}, "type": "array" }, { "type": "object" }, { "type": "null" } ], "title": "Schema" } }, "type": "object", "required": ["type"], "title": "ChatCompletionRequestResponseFormat" }, "ChatCompletionRequestSystemMessage": { "properties": { "role": { "type": "string", "const": "system", "title": "Role" }, "content": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Content" } }, "type": "object", "required": ["role", "content"], "title": "ChatCompletionRequestSystemMessage" }, "ChatCompletionRequestToolMessage": { "properties": { "role": { "type": "string", "const": "tool", "title": "Role" }, "content": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Content" }, "tool_call_id": { "type": "string", "title": "Tool Call Id" } }, "type": "object", "required": ["role", "content", "tool_call_id"], "title": "ChatCompletionRequestToolMessage" }, "ChatCompletionRequestUserMessage": { "properties": { "role": { "type": "string", "const": "user", "title": "Role" }, "content": { "anyOf": [ { "type": "string" }, { "items": { "anyOf": [ { "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText" }, { "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImage" } ] }, "type": "array" }, { "type": "null" } ], "title": "Content" } }, "type": "object", "required": ["role", "content"], "title": "ChatCompletionRequestUserMessage" }, "ChatCompletionResponseChoice": { "properties": { "index": { "type": "integer", "title": "Index" }, "message": { "$ref": "#/components/schemas/ChatCompletionResponseMessage" }, "logprobs": { "anyOf": [ { "$ref": "#/components/schemas/CompletionLogprobs" }, { "type": "null" } ] }, "finish_reason": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Finish Reason" } }, "type": "object", "required": ["index", "message", "logprobs", "finish_reason"], "title": "ChatCompletionResponseChoice" }, "ChatCompletionResponseFunctionCall": { "properties": { "name": { "type": "string", "title": "Name" }, "arguments": { "type": "string", "title": "Arguments" } }, "type": "object", "required": ["name", "arguments"], "title": "ChatCompletionResponseFunctionCall" }, "ChatCompletionResponseMessage": { "properties": { "content": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Content" }, "tool_calls": { "items": { "$ref": "#/components/schemas/ChatCompletionMessageToolCall" }, "type": "array", "title": "Tool Calls" }, "role": { "type": "string", "enum": ["assistant", "function"], "title": "Role" }, "function_call": { "$ref": "#/components/schemas/ChatCompletionResponseFunctionCall" } }, "type": "object", "required": ["content", "role"], "title": "ChatCompletionResponseMessage" }, "ChatCompletionTool": { "properties": { "type": { "type": "string", "const": "function", "title": "Type" }, "function": { "$ref": "#/components/schemas/ChatCompletionToolFunction" } }, "type": "object", "required": ["type", "function"], "title": "ChatCompletionTool" }, "ChatCompletionToolFunction": { "properties": { "name": { "type": "string", "title": "Name" }, "description": { "type": "string", "title": "Description" }, "parameters": { "additionalProperties": { "anyOf": [ { "type": "integer" }, { "type": "string" }, { "type": "boolean" }, { "items": {}, "type": "array" }, { "type": "object" }, { "type": "null" } ] }, "type": "object", "title": "Parameters" } }, "type": "object", "required": ["name", "parameters"], "title": "ChatCompletionToolFunction" }, "CompletionChoice": { "properties": { "text": { "type": "string", "title": "Text" }, "index": { "type": "integer", "title": "Index" }, "logprobs": { "anyOf": [ { "$ref": "#/components/schemas/CompletionLogprobs" }, { "type": "null" } ] }, "finish_reason": { "anyOf": [ { "type": "string", "enum": ["stop", "length"] }, { "type": "null" } ], "title": "Finish Reason" } }, "type": "object", "required": ["text", "index", "logprobs", "finish_reason"], "title": "CompletionChoice" }, "CompletionLogprobs": { "properties": { "text_offset": { "items": { "type": "integer" }, "type": "array", "title": "Text Offset" }, "token_logprobs": { "items": { "anyOf": [ { "type": "number" }, { "type": "null" } ] }, "type": "array", "title": "Token Logprobs" }, "tokens": { "items": { "type": "string" }, "type": "array", "title": "Tokens" }, "top_logprobs": { "items": { "anyOf": [ { "additionalProperties": { "type": "number" }, "type": "object" }, { "type": "null" } ] }, "type": "array", "title": "Top Logprobs" } }, "type": "object", "required": ["text_offset", "token_logprobs", "tokens", "top_logprobs"], "title": "CompletionLogprobs" }, "CompletionUsage": { "properties": { "prompt_tokens": { "type": "integer", "title": "Prompt Tokens" }, "completion_tokens": { "type": "integer", "title": "Completion Tokens" }, "total_tokens": { "type": "integer", "title": "Total Tokens" } }, "type": "object", "required": ["prompt_tokens", "completion_tokens", "total_tokens"], "title": "CompletionUsage" }, "CreateChatCompletionRequest": { "properties": { "messages": { "items": { "anyOf": [ { "$ref": "#/components/schemas/ChatCompletionRequestSystemMessage" }, { "$ref": "#/components/schemas/ChatCompletionRequestUserMessage" }, { "$ref": "#/components/schemas/ChatCompletionRequestAssistantMessage" }, { "$ref": "#/components/schemas/ChatCompletionRequestToolMessage" }, { "$ref": "#/components/schemas/ChatCompletionRequestFunctionMessage" } ] }, "type": "array", "title": "Messages", "description": "A list of messages to generate completions for.", "default": [] }, "functions": { "anyOf": [ { "items": { "$ref": "#/components/schemas/ChatCompletionFunction" }, "type": "array" }, { "type": "null" } ], "title": "Functions", "description": "A list of functions to apply to the generated completions." }, "function_call": { "anyOf": [ { "type": "string", "enum": ["none", "auto"] }, { "$ref": "#/components/schemas/ChatCompletionRequestFunctionCallOption" }, { "type": "null" } ], "title": "Function Call", "description": "A function to apply to the generated completions." }, "tools": { "anyOf": [ { "items": { "$ref": "#/components/schemas/ChatCompletionTool" }, "type": "array" }, { "type": "null" } ], "title": "Tools", "description": "A list of tools to apply to the generated completions." }, "tool_choice": { "anyOf": [ { "type": "string", "enum": ["none", "auto", "required"] }, { "$ref": "#/components/schemas/ChatCompletionNamedToolChoice" }, { "type": "null" } ], "title": "Tool Choice", "description": "A tool to apply to the generated completions." }, "max_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "Max Tokens", "description": "The maximum number of tokens to generate. Defaults to inf" }, "min_tokens": { "type": "integer", "minimum": 0.0, "title": "Min Tokens", "description": "The minimum number of tokens to generate. It may return fewer tokens if another condition is met (e.g. max_tokens, stop).", "default": 0 }, "logprobs": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "title": "Logprobs", "description": "Whether to output the logprobs or not. Default is True", "default": false }, "top_logprobs": { "anyOf": [ { "type": "integer", "minimum": 0.0 }, { "type": "null" } ], "title": "Top Logprobs", "description": "The number of logprobs to generate. If None, no logprobs are generated. logprobs need to set to True." }, "temperature": { "type": "number", "title": "Temperature", "description": "Adjust the randomness of the generated text.\n\nTemperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.", "default": 0.8 }, "top_p": { "type": "number", "maximum": 1.0, "minimum": 0.0, "title": "Top P", "description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\nTop-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.", "default": 0.95 }, "min_p": { "type": "number", "maximum": 1.0, "minimum": 0.0, "title": "Min P", "description": "Sets a minimum base probability threshold for token selection.\n\nThe Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.", "default": 0.05 }, "stop": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" }, { "type": "null" } ], "title": "Stop", "description": "A list of tokens at which to stop generation. If None, no stop tokens are used." }, "stream": { "type": "boolean", "title": "Stream", "description": "Whether to stream the results as they are generated. Useful for chatbots.", "default": false }, "stream_options": { "anyOf": [ { "$ref": "#/components/schemas/StreamOptions" }, { "type": "null" } ], "description": "Options for streaming response. Only set this when you set stream: true." }, "presence_penalty": { "anyOf": [ { "type": "number", "maximum": 2.0, "minimum": -2.0 }, { "type": "null" } ], "title": "Presence Penalty", "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", "default": 0.0 }, "frequency_penalty": { "anyOf": [ { "type": "number", "maximum": 2.0, "minimum": -2.0 }, { "type": "null" } ], "title": "Frequency Penalty", "description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", "default": 0.0 }, "logit_bias": { "anyOf": [ { "additionalProperties": { "type": "number" }, "type": "object" }, { "type": "null" } ], "title": "Logit Bias" }, "seed": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "Seed" }, "response_format": { "anyOf": [ { "$ref": "#/components/schemas/ChatCompletionRequestResponseFormat" }, { "type": "null" } ] }, "model": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Model", "description": "The model to use for generating completions." }, "n": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "N", "default": 1 }, "user": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "User" }, "top_k": { "type": "integer", "minimum": 0.0, "title": "Top K", "description": "Limit the next token selection to the K most probable tokens.\n\nTop-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.", "default": 40 }, "repeat_penalty": { "type": "number", "minimum": 0.0, "title": "Repeat Penalty", "description": "A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\nRepeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.", "default": 1.1 }, "logit_bias_type": { "anyOf": [ { "type": "string", "enum": ["input_ids", "tokens"] }, { "type": "null" } ], "title": "Logit Bias Type" }, "mirostat_mode": { "type": "integer", "maximum": 2.0, "minimum": 0.0, "title": "Mirostat Mode", "description": "Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)", "default": 0 }, "mirostat_tau": { "type": "number", "maximum": 10.0, "minimum": 0.0, "title": "Mirostat Tau", "description": "Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text", "default": 5.0 }, "mirostat_eta": { "type": "number", "maximum": 1.0, "minimum": 0.001, "title": "Mirostat Eta", "description": "Mirostat learning rate", "default": 0.1 }, "grammar": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Grammar" } }, "type": "object", "title": "CreateChatCompletionRequest", "examples": [ { "messages": [ { "content": "You are a helpful assistant.", "role": "system" }, { "content": "What is the capital of France?", "role": "user" } ] } ] }, "CreateChatCompletionResponse": { "properties": { "id": { "type": "string", "title": "Id" }, "object": { "type": "string", "const": "chat.completion", "title": "Object" }, "created": { "type": "integer", "title": "Created" }, "model": { "type": "string", "title": "Model" }, "choices": { "items": { "$ref": "#/components/schemas/ChatCompletionResponseChoice" }, "type": "array", "title": "Choices" }, "usage": { "$ref": "#/components/schemas/CompletionUsage" } }, "type": "object", "required": ["id", "object", "created", "model", "choices", "usage"], "title": "CreateChatCompletionResponse" }, "CreateCompletionRequest": { "properties": { "prompt": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" } ], "title": "Prompt", "description": "The prompt to generate completions for.", "default": "" }, "suffix": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Suffix", "description": "A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots." }, "max_tokens": { "anyOf": [ { "type": "integer", "minimum": 0.0 }, { "type": "null" } ], "title": "Max Tokens", "description": "The maximum number of tokens to generate.", "default": 16 }, "min_tokens": { "type": "integer", "minimum": 0.0, "title": "Min Tokens", "description": "The minimum number of tokens to generate. It may return fewer tokens if another condition is met (e.g. max_tokens, stop).", "default": 0 }, "temperature": { "type": "number", "title": "Temperature", "description": "Adjust the randomness of the generated text.\n\nTemperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.", "default": 0.8 }, "top_p": { "type": "number", "maximum": 1.0, "minimum": 0.0, "title": "Top P", "description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\nTop-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.", "default": 0.95 }, "min_p": { "type": "number", "maximum": 1.0, "minimum": 0.0, "title": "Min P", "description": "Sets a minimum base probability threshold for token selection.\n\nThe Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.", "default": 0.05 }, "echo": { "type": "boolean", "title": "Echo", "description": "Whether to echo the prompt in the generated text. Useful for chatbots.", "default": false }, "stop": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" }, { "type": "null" } ], "title": "Stop", "description": "A list of tokens at which to stop generation. If None, no stop tokens are used." }, "stream": { "type": "boolean", "title": "Stream", "description": "Whether to stream the results as they are generated. Useful for chatbots.", "default": false }, "stream_options": { "anyOf": [ { "$ref": "#/components/schemas/StreamOptions" }, { "type": "null" } ], "description": "Options for streaming response. Only set this when you set stream: true." }, "logprobs": { "anyOf": [ { "type": "integer", "minimum": 0.0 }, { "type": "null" } ], "title": "Logprobs", "description": "The number of logprobs to generate. If None, no logprobs are generated." }, "presence_penalty": { "anyOf": [ { "type": "number", "maximum": 2.0, "minimum": -2.0 }, { "type": "null" } ], "title": "Presence Penalty", "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", "default": 0.0 }, "frequency_penalty": { "anyOf": [ { "type": "number", "maximum": 2.0, "minimum": -2.0 }, { "type": "null" } ], "title": "Frequency Penalty", "description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", "default": 0.0 }, "logit_bias": { "anyOf": [ { "additionalProperties": { "type": "number" }, "type": "object" }, { "type": "null" } ], "title": "Logit Bias" }, "seed": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "Seed" }, "model": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Model", "description": "The model to use for generating completions." }, "n": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "N", "default": 1 }, "best_of": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "Best Of", "default": 1 }, "user": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "User" }, "top_k": { "type": "integer", "minimum": 0.0, "title": "Top K", "description": "Limit the next token selection to the K most probable tokens.\n\nTop-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.", "default": 40 }, "repeat_penalty": { "type": "number", "minimum": 0.0, "title": "Repeat Penalty", "description": "A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\nRepeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.", "default": 1.1 }, "logit_bias_type": { "anyOf": [ { "type": "string", "enum": ["input_ids", "tokens"] }, { "type": "null" } ], "title": "Logit Bias Type" }, "mirostat_mode": { "type": "integer", "maximum": 2.0, "minimum": 0.0, "title": "Mirostat Mode", "description": "Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)", "default": 0 }, "mirostat_tau": { "type": "number", "maximum": 10.0, "minimum": 0.0, "title": "Mirostat Tau", "description": "Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text", "default": 5.0 }, "mirostat_eta": { "type": "number", "maximum": 1.0, "minimum": 0.001, "title": "Mirostat Eta", "description": "Mirostat learning rate", "default": 0.1 }, "grammar": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Grammar" } }, "type": "object", "title": "CreateCompletionRequest", "examples": [ { "prompt": "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n", "stop": ["\n", "###"] } ] }, "CreateCompletionResponse": { "properties": { "id": { "type": "string", "title": "Id" }, "object": { "type": "string", "const": "text_completion", "title": "Object" }, "created": { "type": "integer", "title": "Created" }, "model": { "type": "string", "title": "Model" }, "choices": { "items": { "$ref": "#/components/schemas/CompletionChoice" }, "type": "array", "title": "Choices" }, "usage": { "$ref": "#/components/schemas/CompletionUsage" } }, "type": "object", "required": ["id", "object", "created", "model", "choices"], "title": "CreateCompletionResponse" }, "CreateEmbeddingRequest": { "properties": { "model": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Model", "description": "The model to use for generating completions." }, "input": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" } ], "title": "Input", "description": "The input to embed." }, "user": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "User" } }, "type": "object", "required": ["input"], "title": "CreateEmbeddingRequest", "examples": [ { "input": "The food was delicious and the waiter..." } ] }, "DetokenizeInputRequest": { "properties": { "model": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Model", "description": "The model to use for generating completions." }, "tokens": { "items": { "type": "integer" }, "type": "array", "title": "Tokens", "description": "A list of toekns to detokenize." } }, "type": "object", "required": ["tokens"], "title": "DetokenizeInputRequest", "example": [ { "tokens": [123, 321, 222] } ] }, "DetokenizeInputResponse": { "properties": { "text": { "type": "string", "title": "Text", "description": "The detokenized text." } }, "type": "object", "required": ["text"], "title": "DetokenizeInputResponse", "example": { "text": "How many tokens in this query?" } }, "HTTPValidationError": { "properties": { "detail": { "items": { "$ref": "#/components/schemas/ValidationError" }, "type": "array", "title": "Detail" } }, "type": "object", "title": "HTTPValidationError" }, "ModelData": { "properties": { "id": { "type": "string", "title": "Id" }, "object": { "type": "string", "const": "model", "title": "Object" }, "owned_by": { "type": "string", "title": "Owned By" }, "permissions": { "items": { "type": "string" }, "type": "array", "title": "Permissions" } }, "type": "object", "required": ["id", "object", "owned_by", "permissions"], "title": "ModelData" }, "ModelList": { "properties": { "object": { "type": "string", "const": "list", "title": "Object" }, "data": { "items": { "$ref": "#/components/schemas/ModelData" }, "type": "array", "title": "Data" } }, "type": "object", "required": ["object", "data"], "title": "ModelList" }, "StreamOptions": { "properties": { "include_usage": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "title": "Include Usage" } }, "type": "object", "required": ["include_usage"], "title": "StreamOptions" }, "TokenizeInputCountResponse": { "properties": { "count": { "type": "integer", "title": "Count", "description": "The number of tokens in the input." } }, "type": "object", "required": ["count"], "title": "TokenizeInputCountResponse", "example": { "count": 5 } }, "TokenizeInputRequest": { "properties": { "model": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Model", "description": "The model to use for generating completions." }, "input": { "type": "string", "title": "Input", "description": "The input to tokenize." } }, "type": "object", "required": ["input"], "title": "TokenizeInputRequest", "examples": [ { "input": "How many tokens in this query?" } ] }, "TokenizeInputResponse": { "properties": { "tokens": { "items": { "type": "integer" }, "type": "array", "title": "Tokens", "description": "A list of tokens." } }, "type": "object", "required": ["tokens"], "title": "TokenizeInputResponse", "example": { "tokens": [123, 321, 222] } }, "ValidationError": { "properties": { "loc": { "items": { "anyOf": [ { "type": "string" }, { "type": "integer" } ] }, "type": "array", "title": "Location" }, "msg": { "type": "string", "title": "Message" }, "type": { "type": "string", "title": "Error Type" } }, "type": "object", "required": ["loc", "msg", "type"], "title": "ValidationError" } }, "securitySchemes": { "HTTPBearer": { "type": "http", "scheme": "bearer" } } } } ================================================ FILE: packages/backend/src/extension.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import type { ExtensionContext } from '@podman-desktop/api'; import { activate, deactivate } from './extension'; import { Studio } from './studio'; vi.mock('./studio'); beforeEach(() => { vi.clearAllMocks(); }); test('check we call activate method on studio instance', async () => { const fakeContext = {} as unknown as ExtensionContext; await activate(fakeContext); // expect the activate method to be called on the studio mock expect(Studio.prototype.activate).toBeCalledTimes(1); // no call on deactivate expect(Studio.prototype.deactivate).not.toBeCalled(); }); test('check we call deactivate method on studio instance ', async () => { await deactivate(); // expect the activate method to be called on the studio mock expect(Studio.prototype.deactivate).toBeCalledTimes(1); // no call on activate expect(Studio.prototype.activate).not.toBeCalled(); }); ================================================ FILE: packages/backend/src/extension.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ExtensionContext } from '@podman-desktop/api'; import { Studio } from './studio'; let studio: Studio | undefined; export async function activate(extensionContext: ExtensionContext): Promise { studio = new Studio(extensionContext); await studio?.activate(); } export async function deactivate(): Promise { await studio?.deactivate(); } ================================================ FILE: packages/backend/src/instructlab-api-impl.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { InstructlabAPI } from '@shared/InstructlabAPI'; import type { InstructlabManager } from './managers/instructlab/instructlabManager'; import type { InstructlabSession } from '@shared/models/instructlab/IInstructlabSession'; import type { InstructlabContainerConfiguration } from '@shared/models/instructlab/IInstructlabContainerConfiguration'; import { navigation } from '@podman-desktop/api'; export class InstructlabApiImpl implements InstructlabAPI { constructor(private instructlabManager: InstructlabManager) {} async getIsntructlabSessions(): Promise { return this.instructlabManager.getSessions(); } requestCreateInstructlabContainer(config: InstructlabContainerConfiguration): Promise { return this.instructlabManager.requestCreateInstructlabContainer(config); } routeToInstructLabContainerTerminal(containerId: string): Promise { return navigation.navigateToContainerTerminal(containerId); } getInstructlabContainerId(): Promise { return this.instructlabManager.getInstructLabContainer(); } } ================================================ FILE: packages/backend/src/llama-stack-api-impl.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { navigation } from '@podman-desktop/api'; import type { LlamaStackAPI } from '@shared/LlamaStackAPI'; import type { LlamaStackContainerConfiguration } from '@shared/models/llama-stack/LlamaStackContainerConfiguration'; import type { LlamaStackManager } from './managers/llama-stack/llamaStackManager'; import type { LlamaStackContainers } from '@shared/models/llama-stack/LlamaStackContainerInfo'; export class LlamaStackApiImpl implements LlamaStackAPI { constructor(private llamaStackManager: LlamaStackManager) {} requestcreateLlamaStackContainerss(config: LlamaStackContainerConfiguration): Promise { return this.llamaStackManager.requestcreateLlamaStackContainerss(config); } routeToLlamaStackContainerTerminal(containerId: string): Promise { return navigation.navigateToContainerTerminal(containerId); } getLlamaStackContainersInfo(): Promise { return this.llamaStackManager.getLlamaStackContainers(); } } ================================================ FILE: packages/backend/src/managers/GPUManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test, vi, beforeEach } from 'vitest'; import { GPUManager } from './GPUManager'; import { graphics, type Systeminformation } from 'systeminformation'; import { GPUVendor } from '@shared/models/IGPUInfo'; import type { RpcExtension } from '@shared/messages/MessageProxy'; vi.mock('../utils/inferenceUtils', () => ({ getProviderContainerConnection: vi.fn(), getImageInfo: vi.fn(), })); vi.mock('@podman-desktop/api', async () => { return { env: { isWindows: false, }, }; }); vi.mock('systeminformation', () => ({ graphics: vi.fn(), })); const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); }); test('post constructor should have no items', () => { const manager = new GPUManager(rpcExtensionMock); expect(manager.getAll().length).toBe(0); }); test('no controller should return empty array', async () => { vi.mocked(graphics).mockResolvedValue({ controllers: [], displays: [], }); const manager = new GPUManager(rpcExtensionMock); expect(await manager.collectGPUs()).toHaveLength(0); }); test('intel controller should return intel vendor', async () => { vi.mocked(graphics).mockResolvedValue({ controllers: [ { vendor: 'Intel Corporation', model: 'intel model', vram: 1024, } as unknown as Systeminformation.GraphicsControllerData, ], displays: [], }); const manager = new GPUManager(rpcExtensionMock); expect(await manager.collectGPUs()).toStrictEqual([ { vendor: GPUVendor.INTEL, model: 'intel model', vram: 1024, }, ]); }); test('NVIDIA controller should return intel vendor', async () => { vi.mocked(graphics).mockResolvedValue({ controllers: [ { vendor: 'NVIDIA', model: 'NVIDIA GeForce GTX 1060 6GB', vram: 6144, } as unknown as Systeminformation.GraphicsControllerData, ], displays: [], }); const manager = new GPUManager(rpcExtensionMock); expect(await manager.collectGPUs()).toStrictEqual([ { vendor: GPUVendor.NVIDIA, model: 'NVIDIA GeForce GTX 1060 6GB', vram: 6144, }, ]); }); test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => { vi.mocked(graphics).mockResolvedValue({ controllers: [ { vendor: 'NVIDIA Corporation', model: 'NVIDIA GeForce GTX 1060 6GB', vram: 6144, } as unknown as Systeminformation.GraphicsControllerData, ], displays: [], }); const manager = new GPUManager(rpcExtensionMock); expect(await manager.collectGPUs()).toStrictEqual([ { vendor: GPUVendor.NVIDIA, model: 'NVIDIA GeForce GTX 1060 6GB', vram: 6144, }, ]); }); ================================================ FILE: packages/backend/src/managers/GPUManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type Disposable } from '@podman-desktop/api'; import { GPUVendor, type IGPUInfo } from '@shared/models/IGPUInfo'; import { Publisher } from '../utils/Publisher'; import { graphics } from 'systeminformation'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_GPUS_UPDATE } from '@shared/Messages'; /** * @experimental */ export class GPUManager extends Publisher implements Disposable { #gpus: IGPUInfo[]; constructor(rpcExtension: RpcExtension) { super(rpcExtension, MSG_GPUS_UPDATE, () => this.getAll()); // init properties this.#gpus = []; } dispose(): void {} getAll(): IGPUInfo[] { return this.#gpus; } async collectGPUs(): Promise { const { controllers } = await graphics(); return controllers.map(controller => ({ vendor: this.getVendor(controller.vendor), model: controller.model, vram: controller.vram ?? undefined, })); } protected getVendor(raw: string): GPUVendor { switch (raw) { case 'Intel Corporation': return GPUVendor.INTEL; case 'NVIDIA': case 'NVIDIA Corporation': return GPUVendor.NVIDIA; case 'Apple': return GPUVendor.APPLE; default: return GPUVendor.UNKNOWN; } } } ================================================ FILE: packages/backend/src/managers/SnippetManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import { SnippetManager } from './SnippetManager'; import type { TelemetryLogger } from '@podman-desktop/api'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_SUPPORTED_LANGUAGES_UPDATE } from '@shared/Messages'; const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; const telemetryMock = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); }); test('expect init to notify webview', () => { const manager = new SnippetManager(rpcExtensionMock, telemetryMock); manager.init(); expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_SUPPORTED_LANGUAGES_UPDATE, manager.getLanguageList()); }); test('expect postman-code-generators to have many languages available.', () => { const manager = new SnippetManager(rpcExtensionMock, telemetryMock); manager.init(); expect(manager.getLanguageList().length).toBeGreaterThan(0); }); test('expect postman-code-generators to have nodejs supported.', () => { const manager = new SnippetManager(rpcExtensionMock, telemetryMock); manager.init(); const languages = manager.getLanguageList(); const nodejs = languages.find(language => language.key === 'nodejs'); expect(nodejs).toBeDefined(); expect(nodejs?.variants.length).toBeGreaterThan(0); const native = nodejs?.variants.find(variant => variant.key === 'Request'); expect(native).toBeDefined(); }); test('expect postman-code-generators to generate proper nodejs native code', async () => { const manager = new SnippetManager(rpcExtensionMock, telemetryMock); manager.init(); const snippet = await manager.generate( { url: 'http://localhost:8080', }, 'nodejs', 'Request', ); expect(snippet).toBe(`var request = require('request'); var options = { 'method': 'GET', 'url': 'http://localhost:8080', 'headers': { } }; request(options, function (error, response) { if (error) throw new Error(error); console.log(response.body); }); `); }); test('expect snippet manager to have Quarkus Langchain4J supported.', () => { const manager = new SnippetManager(rpcExtensionMock, telemetryMock); manager.init(); const languages = manager.getLanguageList(); const java = languages.find(language => language.key === 'java'); expect(java).toBeDefined(); expect(java?.variants.length).toBeGreaterThan(0); const quarkus_langchain4j = java?.variants.find(variant => variant.key === 'Quarkus Langchain4J'); expect(quarkus_langchain4j).toBeDefined(); }); test('expect new variant to replace existing one if same name', () => { const manager = new SnippetManager(rpcExtensionMock, telemetryMock); manager.init(); const languages = manager.getLanguageList(); const java = languages.find(language => language.key === 'java'); expect(java).toBeDefined(); expect(java?.variants.length).toBeGreaterThan(0); if (!java) throw new Error('undefined java'); const oldVariantsNumber = java.variants.length; manager.addVariant('java', java.variants[0].key, vi.fn()); const languages_updated = manager.getLanguageList(); const java_updated = languages_updated.find(language => language.key === 'java'); expect(java_updated).toBeDefined(); expect(java_updated?.variants.length).equals(oldVariantsNumber); }); ================================================ FILE: packages/backend/src/managers/SnippetManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { Disposable, TelemetryLogger } from '@podman-desktop/api'; import { getLanguageList, convert, type Language } from 'postman-code-generators'; import { Request } from 'postman-collection'; import { Publisher } from '../utils/Publisher'; import type { RequestOptions } from '@shared/models/RequestOptions'; import { quarkusLangchain4Jgenerator } from './snippets/quarkus-snippet'; import { javaOkHttpGenerator } from './snippets/java-okhttp-snippet'; import { pythonLangChainGenerator } from './snippets/python-langchain-snippet'; import { MSG_SUPPORTED_LANGUAGES_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; type Generator = (requestOptions: RequestOptions) => Promise; export class SnippetManager extends Publisher implements Disposable { #languages: Language[]; #additionalGenerators: Map; constructor( rpcExtension: RpcExtension, private telemetry: TelemetryLogger, ) { super(rpcExtension, MSG_SUPPORTED_LANGUAGES_UPDATE, () => this.getLanguageList()); this.#languages = []; this.#additionalGenerators = new Map(); } addVariant(key: string, variant: string, generator: Generator): void { const original = this.#languages; const language = original.find((lang: Language) => lang.key === key); if (language) { if (!language.variants.find(v => v.key === variant)) { language.variants.push({ key: variant }); } this.#additionalGenerators.set(`${key}/${variant}`, generator); } } getLanguageList(): Language[] { return this.#languages; } async generate(requestOptions: RequestOptions, language: string, variant: string): Promise { this.telemetry.logUsage('snippet.generate', { language: language, variant: variant }); const generator = this.#additionalGenerators.get(`${language}/${variant}`); if (generator) { return generator(requestOptions); } return new Promise((resolve, reject) => { const request = new Request(requestOptions); convert(language, variant, request, {}, (error: unknown, snippet: string | undefined) => { if (error) { reject(error); return; } else if (snippet === undefined) { throw new Error('undefined snippet'); } resolve(snippet); }); }); } init(): void { this.#languages = getLanguageList(); this.addVariant('java', 'Quarkus Langchain4J', quarkusLangchain4Jgenerator); this.addVariant('java', 'OkHttp', javaOkHttpGenerator); this.addVariant('python', 'Python LangChain', pythonLangChainGenerator); // Notify the publisher this.notify(); } dispose(): void {} } ================================================ FILE: packages/backend/src/managers/TaskRunner.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import type { TaskRegistry } from '../registries/TaskRegistry'; import { TaskRunner } from './TaskRunner'; import type { TaskRunnerTools } from '../models/TaskRunner'; import type { Task } from '@shared/models/ITask'; const taskRegistry = { createTask: vi.fn(), updateTask: vi.fn(), getTasksByLabels: vi.fn(), } as unknown as TaskRegistry; const runner = vi.fn<(tools: TaskRunnerTools) => Promise>(); let taskRunner: TaskRunner; beforeEach(() => { vi.resetAllMocks(); taskRunner = new TaskRunner(taskRegistry); }); test('runner terminates with no successLabel', async () => { vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'task1', name: 'Loading...', state: 'loading', }); runner.mockResolvedValue(); const labels = { label1: 'value1', label2: 'value2', }; await taskRunner.runAsTask( labels, { loadingLabel: 'Loading...', errorMsg: err => `an error: ${err}`, }, runner, ); expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'task1', name: 'Loading...', state: 'success', }); }); test('runner terminates with successLabel', async () => { vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'task1', name: 'Loading...', state: 'loading', }); runner.mockResolvedValue(); const labels = { label1: 'value1', label2: 'value2', }; await taskRunner.runAsTask( labels, { loadingLabel: 'Loading...', successLabel: 'Success!!', errorMsg: err => `an error: ${err}`, }, runner, ); expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'task1', name: 'Success!!', state: 'success', }); }); test('runner throws with no errorLabel', async () => { vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'task1', name: 'Loading...', state: 'loading', }); runner.mockRejectedValue('something goes wrong'); const labels = { label1: 'value1', label2: 'value2', }; await expect(() => taskRunner.runAsTask( labels, { loadingLabel: 'Loading...', errorMsg: err => `an error: ${err}`, }, runner, ), ).rejects.toThrow(); expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'task1', name: 'Loading...', state: 'error', error: 'an error: something goes wrong', }); }); test('runner throws with errorLabel', async () => { vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'task1', name: 'Loading...', state: 'loading', }); runner.mockRejectedValue('something goes wrong'); const labels = { label1: 'value1', label2: 'value2', }; await expect(() => taskRunner.runAsTask( labels, { loadingLabel: 'Loading...', errorLabel: 'Failed :(', errorMsg: err => `an error: ${err}`, }, runner, ), ).rejects.toThrow(); expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'task1', name: 'Failed :(', state: 'error', error: 'an error: something goes wrong', }); }); test('updateLabels', async () => { vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'task1', name: 'Loading...', state: 'loading', }); runner.mockImplementation(async ({ updateLabels }) => { updateLabels(labels => ({ ...labels, newLabel: 'newValue' })); }); const labels = { label1: 'value1', label2: 'value2', }; await taskRunner.runAsTask( labels, { loadingLabel: 'Loading...', errorMsg: err => `an error: ${err}`, }, runner, ); expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'task1', name: 'Loading...', state: 'success', labels: { label1: 'value1', label2: 'value2', newLabel: 'newValue', }, }); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'task1', name: 'Loading...', state: 'success', labels: { label1: 'value1', label2: 'value2', newLabel: 'newValue', }, }); }); test.each<{ failFast: boolean }>([ { failFast: true, }, { failFast: false, }, ])('failFastSubtasks $failFast', async ({ failFast }) => { vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'task1', name: 'Loading...', state: 'loading', }); const otherTasks: Task[] = [ { id: 'subtask1', name: 'Sub task 1', state: 'loading', }, { id: 'subtask2', name: 'Sub task 2', state: 'loading', }, { id: 'subtask3', name: 'Sub task 3', state: 'error', }, ]; vi.mocked(taskRegistry.getTasksByLabels).mockReturnValue(otherTasks); runner.mockRejectedValue('something goes wrong'); const labels = { label1: 'value1', label2: 'value2', }; await expect(() => taskRunner.runAsTask( labels, { loadingLabel: 'Loading...', errorMsg: err => `an error: ${err}`, failFastSubtasks: failFast, }, runner, ), ).rejects.toThrow(); expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels); if (failFast) { expect(taskRegistry.updateTask).toHaveBeenCalledTimes(3); expect(taskRegistry.updateTask).toHaveBeenNthCalledWith(1, { ...otherTasks[0], state: 'error' }); expect(taskRegistry.updateTask).toHaveBeenNthCalledWith(2, { ...otherTasks[1], state: 'error' }); expect(taskRegistry.updateTask).toHaveBeenNthCalledWith(3, { id: 'task1', name: 'Loading...', state: 'error', error: 'an error: something goes wrong', }); } else { expect(taskRegistry.updateTask).toHaveBeenCalledExactlyOnceWith({ id: 'task1', name: 'Loading...', state: 'error', error: 'an error: something goes wrong', }); } }); ================================================ FILE: packages/backend/src/managers/TaskRunner.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RunAsTaskOptions, TaskRunnerTools } from '../models/TaskRunner'; import type { TaskRegistry } from '../registries/TaskRegistry'; export class TaskRunner { constructor(private taskRegistry: TaskRegistry) {} async runAsTask( labels: Record, options: RunAsTaskOptions, run: (tools: TaskRunnerTools) => Promise, ): Promise { const tools = { updateLabels: (f: (labels: Record) => Record): void => { task.labels = f(labels); this.taskRegistry.updateTask(task); }, }; const task = this.taskRegistry.createTask(options.loadingLabel, 'loading', labels); try { const result = await run(tools); task.state = 'success'; if (options.successLabel) { task.name = options.successLabel; } return result; } catch (err: unknown) { task.state = 'error'; task.error = options.errorMsg(err); if (options.errorLabel) { task.name = options.errorLabel; } if (options.failFastSubtasks) { this.failFastSubtasks(labels); } throw err; } finally { task.progress = undefined; this.taskRegistry.updateTask(task); } } private failFastSubtasks(labels: Record): void { const tasks = this.taskRegistry.getTasksByLabels(labels); // Filter the one no in loading state tasks .filter(t => t.state === 'loading') .forEach(t => { this.taskRegistry.updateTask({ ...t, state: 'error', }); }); } } ================================================ FILE: packages/backend/src/managers/apiServer.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ /* eslint-disable sonarjs/no-nested-functions */ import { afterEach, assert, beforeEach, describe, expect, test, vi } from 'vitest'; import { ApiServer, PREFERENCE_RANDOM_PORT } from './apiServer'; import request from 'supertest'; import type * as podmanDesktopApi from '@podman-desktop/api'; import path from 'node:path'; import type { Server } from 'node:http'; import type { ModelsManager } from './modelsManager'; import type { EventEmitter } from 'node:events'; import { once } from 'node:events'; import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry'; import type { AddressInfo } from 'node:net'; import type { CatalogManager } from './catalogManager'; import type { Downloader } from '../utils/downloader'; import type { ProgressEvent } from '../models/baseEvent'; import type { InferenceManager } from './inference/inferenceManager'; import type { ContainerHealthy, ContainerRegistry } from '../registries/ContainerRegistry'; import type { InferenceServer } from '@shared/models/IInference'; import OpenAI from 'openai'; import type { ChatCompletion, ChatCompletionChunk } from 'openai/resources'; import { Stream } from 'openai/streaming'; vi.mock('openai', () => { const OpenAI = vi.fn(); OpenAI.prototype = { chat: { completions: { create: vi.fn(), }, }, }; return { default: OpenAI }; }); class TestApiServer extends ApiServer { public override getListener(): Server | undefined { return super.getListener(); } } const extensionContext = {} as unknown as podmanDesktopApi.ExtensionContext; let server: TestApiServer; const modelsManager = { getModelsInfo: vi.fn(), isModelOnDisk: vi.fn(), createDownloader: vi.fn(), getLocalModelsFromDisk: vi.fn(), sendModelsInfo: vi.fn(), } as unknown as ModelsManager; const catalogManager = { getModelByName: vi.fn(), } as unknown as CatalogManager; const inferenceManager = { getServers: vi.fn(), createInferenceServer: vi.fn(), startInferenceServer: vi.fn(), } as unknown as InferenceManager; const configurationRegistry = { getExtensionConfiguration: () => { return { apiPort: PREFERENCE_RANDOM_PORT, }; }, } as unknown as ConfigurationRegistry; const containerRegistry = { onHealthyContainerEvent: vi.fn(), } as unknown as ContainerRegistry; beforeEach(async () => { vi.clearAllMocks(); server = new TestApiServer( extensionContext, modelsManager, catalogManager, inferenceManager, configurationRegistry, containerRegistry, ); vi.spyOn(server, 'getSpecFile').mockReturnValue(path.join(__dirname, '../../../../api/openapi.yaml')); vi.spyOn(server, 'getPackageFile').mockReturnValue(path.join(__dirname, '../../../../package.json')); await server.init(); await new Promise(resolve => setTimeout(resolve, 0)); // wait for random port to be set }); afterEach(async () => { server.dispose(); await once(server.getListener() as EventEmitter, 'close'); }); test('/spec endpoint', async () => { expect(server.getListener()).toBeDefined(); const res = await request(server.getListener()!) .get('/spec') .expect(200) .expect('Content-Type', 'application/yaml; charset=utf-8'); expect(res.text).toMatch(/^openapi:/); }); test('/spec endpoint when spec file is not found', async () => { expect(server.getListener()).toBeDefined(); vi.spyOn(server, 'getSpecFile').mockReturnValue(path.join(__dirname, '../../../../api/openapi-notfound.yaml')); const res = await request(server.getListener()!).get('/spec').expect(500); expect(res.body.message).toEqual('unable to get spec'); }); test('/spec endpoint when getting spec file fails', async () => { expect(server.getListener()).toBeDefined(); vi.spyOn(server, 'getSpecFile').mockImplementation(() => { throw new Error('an error getting spec file'); }); const res = await request(server.getListener()!).get('/spec').expect(500); expect(res.body.message).toEqual('unable to get spec'); expect(res.body.errors[0]).toEqual('an error getting spec file'); }); test('/api/version endpoint', async () => { expect(server.getListener()).toBeDefined(); const res = await request(server.getListener()!) .get('/api/version') .expect(200) .expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.version).toBeDefined(); }); test('/api/version endpoint when package.json file is not found', async () => { expect(server.getListener()).toBeDefined(); vi.spyOn(server, 'getPackageFile').mockReturnValue(path.join(__dirname, '../../../../package-notfound.json')); const res = await request(server.getListener()!).get('/api/version').expect(500); expect(res.body.message).toEqual('unable to get version'); }); test('/api/version endpoint when getting package.json file fails', async () => { expect(server.getListener()).toBeDefined(); vi.spyOn(server, 'getPackageFile').mockImplementation(() => { throw new Error('an error getting package file'); }); const res = await request(server.getListener()!).get('/api/version').expect(500); expect(res.body.message).toEqual('unable to get version'); expect(res.body.errors[0]).toEqual('an error getting package file'); }); test('/api/version endpoint with unexpected param', async () => { expect(server.getListener()).toBeDefined(); const res = await request(server.getListener()!).get('/api/version?wrongParam').expect(400); expect(res.body.message).toEqual(`Unknown query parameter 'wrongParam'`); }); test('/api/wrongEndpoint', async () => { expect(server.getListener()).toBeDefined(); const res = await request(server.getListener()!).get('/api/wrongEndpoint').expect(404); expect(res.body.message).toEqual('not found'); }); test('/', async () => { expect(server.getListener()).toBeDefined(); await request(server.getListener()!).get('/').expect(200); }); test('/api/tags', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(modelsManager.getModelsInfo).mockReturnValue([]); await request(server.getListener()!).get('/api/tags').expect(200); }); test('/api/tags returns error', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(modelsManager.getModelsInfo).mockRejectedValue({}); const res = await request(server.getListener()!).get('/api/tags').expect(500); expect(res.body.message).toEqual('unable to get models'); }); test('/api/tags returns ok', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(modelsManager.getModelsInfo).mockReturnValue([ { id: 'modelId', name: 'model-name', description: 'a description', }, ]); vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(true); const res = await request(server.getListener()!).get('/api/tags').expect(200); expect(res.body).toBeDefined(); expect(res.body.models).toBeDefined(); expect(res.body.models[0]).toMatchObject({ name: 'model-name', model: 'model-name', }); }); test('/api-docs/9000 returns swagger UI', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(modelsManager.getModelsInfo).mockRejectedValue({}); const listener = server.getListener(); if (!listener) { assert.fail('listener is not defined'); } const response = await request(listener).get('/api-docs/9000/').expect(200); expect(response.status).toBe(200); // Ensure it returns the Swagger UI page expect(response.text).toContain('Swagger UI'); }); test('verify listening on localhost', async () => { expect(server.getListener()).toBeDefined(); expect((server.getListener()?.address() as AddressInfo).address).toEqual('0.0.0.0'); }); test('/api/pull returns an error if no body is passed', async () => { expect(server.getListener()).toBeDefined(); await request(server.getListener()!).post('/api/pull').expect(415); }); describe.each([undefined, true, false])('/api/pull endpoint, stream is %o', stream => { test('/api/pull returns an error if the model is not known', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockImplementation(() => { throw new Error('model unknown'); }); const req = request(server.getListener()!).post('/api/pull').send({ model: 'unknown-model-name', stream }); if (stream === false) { const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.error).toEqual('pull model manifest: file does not exist'); } else { const res = await req.expect(200); const lines = res.text.split('\n'); expect(lines.length).toEqual(3); expect(lines[0]).toEqual('{"status":"pulling manifest"}'); expect(lines[1]).toEqual('{"error":"pull model manifest: file does not exist"}'); expect(lines[2]).toEqual(''); } }); test('/api/pull returns success if model already downloaded', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockReturnValue({ id: 'modelId', name: 'model-name', description: 'a description', }); vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(true); const req = request(server.getListener()!).post('/api/pull').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.status).toEqual('success'); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(3); expect(lines[0]).toEqual('{"status":"pulling manifest"}'); expect(lines[1]).toEqual('{"status":"success"}'); expect(lines[2]).toEqual(''); } }); test('/api/pull downloads model and returns success', async () => { const getLocalModelsSpy = vi.spyOn(modelsManager, 'getLocalModelsFromDisk').mockResolvedValue(); const sendModelsInfoSpy = vi.spyOn(modelsManager, 'sendModelsInfo').mockResolvedValue(); expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockReturnValue({ id: 'modelId', name: 'model-name', description: 'a description', sha256: '123456', }); vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(false); vi.mocked(modelsManager.createDownloader).mockReturnValue({ perform: async (_name: string) => {}, onEvent: (listener: (e: ProgressEvent) => void) => { listener({ status: 'progress', id: 'model-name', total: 100000, value: 100000, }); }, } as unknown as Downloader); const req = request(server.getListener()!).post('/api/pull').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.status).toEqual('success'); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(4); expect(lines[0]).toEqual('{"status":"pulling manifest"}'); expect(lines[1]).toEqual( '{"status":"pulling 123456","digest":"sha256:123456","total":100000,"completed":100000000}', ); expect(lines[2]).toEqual('{"status":"success"}'); expect(lines[3]).toEqual(''); } expect(getLocalModelsSpy).toHaveBeenCalledTimes(1); expect(sendModelsInfoSpy).toHaveBeenCalledTimes(1); }); test('/api/pull should return an error if an error occurs during download', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockReturnValue({ id: 'modelId', name: 'model-name', description: 'a description', sha256: '123456', }); vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(false); vi.mocked(modelsManager.createDownloader).mockReturnValue({ perform: async (_name: string) => { await new Promise(resolve => setTimeout(resolve, 0)); // wait for random port to be set throw new Error('an error'); }, onEvent: (listener: (e: ProgressEvent) => void) => { listener({ status: 'progress', id: 'model-name', total: 100000, value: 100000, }); }, } as unknown as Downloader); const req = request(server.getListener()!).post('/api/pull').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.error).toEqual('Error: an error'); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(4); expect(lines[0]).toEqual('{"status":"pulling manifest"}'); expect(lines[1]).toEqual( '{"status":"pulling 123456","digest":"sha256:123456","total":100000,"completed":100000000}', ); expect(lines[2]).toEqual('{"error":"Error: an error"}'); expect(lines[3]).toEqual(''); } }); }); describe.each([undefined, true, false])('stream is %o', stream => { describe.each(['/api/chat', '/api/generate'])('%o endpoint', endpoint => { test('returns an error if the model is not known', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockImplementation(() => { throw new Error('model unknown'); }); const req = request(server.getListener()!).post(endpoint).send({ model: 'unknown-model-name', stream }); if (stream === false) { const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.error).toEqual('chat: model "unknown-model-name" does not exist'); } else { const res = await req.expect(200); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"error":"chat: model \\"unknown-model-name\\" does not exist"}'); expect(lines[1]).toEqual(''); } }); test('returns an error if model is not downloaded', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockReturnValue({ id: 'modelId', name: 'model-name', description: 'a description', }); vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(false); const req = request(server.getListener()!).post(endpoint).send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body.error).toEqual('chat: model "model-name" not found, try pulling it first'); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"error":"chat: model \\"model-name\\" not found, try pulling it first"}'); expect(lines[1]).toEqual(''); } }); }); describe('the model is available', () => { const onHealthyContainerEventEmptyCallback = (): podmanDesktopApi.Disposable => { return { dispose: vi.fn(), }; }; const onHealthyContainerEventNonEmptyCallback = ( fn: (e: ContainerHealthy) => void, ): podmanDesktopApi.Disposable => { setTimeout( () => fn({ id: 'container1', }), 100, ); return { dispose: vi.fn(), }; }; beforeEach(() => { expect(server.getListener()).toBeDefined(); vi.mocked(catalogManager.getModelByName).mockReturnValue({ id: 'modelId1', name: 'model-name', description: 'a description', file: { file: 'a-file-name', path: '/path/to/model-file', }, }); vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(true); }); describe('the service is initially not created', async () => { beforeEach(async () => { vi.mocked(inferenceManager.getServers).mockReturnValueOnce([]); }); describe('the created service is immediately healthy', () => { beforeEach(() => { vi.mocked(inferenceManager.createInferenceServer).mockImplementation(async () => { vi.mocked(inferenceManager.getServers).mockReturnValueOnce([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'running', health: { Status: 'healthy', }, } as unknown as InferenceServer, ]); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation( onHealthyContainerEventEmptyCallback, ); return 'container1'; }); }); test('/api/generate creates the service and returns that the model is loaded', async () => { const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' }); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}'); expect(lines[1]).toEqual(''); } expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce(); expect(inferenceManager.createInferenceServer).toHaveBeenCalledOnce(); }); }); describe('the created service is eventually healthy', () => { beforeEach(() => { vi.mocked(inferenceManager.createInferenceServer).mockImplementation(async () => { vi.mocked(inferenceManager.getServers).mockReturnValueOnce([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'starting', } as unknown as InferenceServer, ]); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation( onHealthyContainerEventNonEmptyCallback, ); return 'container1'; }); }); test('/api/generate creates the service and returns that the model is loaded', async () => { const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' }); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}'); expect(lines[1]).toEqual(''); } expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce(); expect(inferenceManager.createInferenceServer).toHaveBeenCalledOnce(); }); }); }); describe('the service is initially created but not started', async () => { beforeEach(async () => { vi.mocked(inferenceManager.getServers).mockReturnValueOnce([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'stopped', } as unknown as InferenceServer, ]); }); describe('the started service is immediately healthy', () => { beforeEach(() => { vi.mocked(inferenceManager.startInferenceServer).mockImplementation(async () => { vi.mocked(inferenceManager.getServers).mockReturnValueOnce([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'running', health: { Status: 'healthy', }, } as unknown as InferenceServer, ]); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation( onHealthyContainerEventEmptyCallback, ); }); }); test('/api/generate starts the service and returns that the model is loaded', async () => { const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' }); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}'); expect(lines[1]).toEqual(''); } expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce(); expect(inferenceManager.startInferenceServer).toHaveBeenCalledOnce(); }); }); describe('the started service is eventually healthy', () => { beforeEach(() => { vi.mocked(inferenceManager.startInferenceServer).mockImplementation(async () => { vi.mocked(inferenceManager.getServers).mockReturnValueOnce([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'starting', } as unknown as InferenceServer, ]); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation( onHealthyContainerEventNonEmptyCallback, ); }); }); test('/api/generate starts the service and returns that the model is loaded', async () => { const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' }); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}'); expect(lines[1]).toEqual(''); } expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce(); expect(inferenceManager.startInferenceServer).toHaveBeenCalledOnce(); }); }); }); describe('the service is running', async () => { beforeEach(async () => { vi.mocked(inferenceManager.getServers).mockReturnValue([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'running', health: { Status: 'healthy', }, connection: { port: 8080, }, } as unknown as InferenceServer, ]); }); test('/api/generate returns that the model is loaded', async () => { const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream }); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' }); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(2); expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}'); expect(lines[1]).toEqual(''); } }); describe.each([ { endpoint: '/api/chat', query: { model: 'model-name', stream, messages: [ { role: 'user', content: 'what is the question?', }, ], }, expectedNonStreamed: { model: 'model-name', message: { role: 'assistant', content: 'that is a good question' }, done: true, done_reason: 'stop', }, expectedStreamed: [ '{"model":"model-name","message":{"role":"assistant","content":"that "},"done":false}', '{"model":"model-name","message":{"role":"assistant","content":"is "},"done":false}', '{"model":"model-name","message":{"role":"assistant","content":"a "},"done":false}', '{"model":"model-name","message":{"role":"assistant","content":"good "},"done":false}', '{"model":"model-name","message":{"role":"assistant","content":"question"},"done":false}', '{"model":"model-name","message":{"role":"assistant","content":"."},"done":true,"done_reason":"stop"}', '', ], }, { endpoint: '/api/generate', query: { model: 'model-name', stream, prompt: 'what is the question?' }, expectedNonStreamed: { model: 'model-name', response: 'that is a good question', done: true, done_reason: 'stop', }, expectedStreamed: [ '{"model":"model-name","response":"that ","done":false}', '{"model":"model-name","response":"is ","done":false}', '{"model":"model-name","response":"a ","done":false}', '{"model":"model-name","response":"good ","done":false}', '{"model":"model-name","response":"question","done":false}', '{"model":"model-name","response":".","done":true,"done_reason":"stop"}', '', ], }, ])('%o endpoint', ({ endpoint, query, expectedNonStreamed, expectedStreamed }) => { test('calls the service and replies to the prompt', async () => { if (stream || stream === undefined) { const chunks = [ { choices: [ { delta: { content: 'that ', }, }, ], }, { choices: [ { delta: { content: 'is ', }, }, ], }, { choices: [ { delta: { content: 'a ', }, }, ], }, { choices: [ { delta: { content: 'good ', }, }, ], }, { choices: [ { delta: { content: 'question', }, }, ], }, { choices: [ { delta: { content: '.', }, finish_reason: 'stop', }, ], }, ] as ChatCompletionChunk[]; const asyncIterator = (async function* (): AsyncGenerator< OpenAI.Chat.Completions.ChatCompletionChunk, void, unknown > { for (const chunk of chunks) { yield chunk; } })(); const response = new Stream(() => asyncIterator, new AbortController()); vi.mocked(OpenAI.prototype.chat.completions.create).mockResolvedValue(response); } else { vi.mocked(OpenAI.prototype.chat.completions.create).mockResolvedValue({ id: 'id1', choices: [ { message: { role: 'assistant', content: 'that is a good question', }, }, ], } as unknown as ChatCompletion); } const req = request(server.getListener()!).post(endpoint).send(query); if (stream === false) { const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8'); expect(res.body).toEqual(expectedNonStreamed); } else { const res = await req.expect(200).expect('transfer-encoding', 'chunked'); const lines = res.text.split('\n'); expect(lines.length).toEqual(expectedStreamed.length); for (const [i, line] of lines.entries()) { expect(line).toEqual(expectedStreamed[i]); } } }); }); }); }); }); describe('/api/ps', () => { test('returns an error if the model is not known', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(inferenceManager.getServers).mockImplementation(() => { throw new Error('model unknown'); }); const res = await request(server.getListener()!).get('/api/ps').expect(500); expect(res.body).toMatchObject({ message: 'unable to ps' }); }); test('returns empty result if no servers', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(inferenceManager.getServers).mockReturnValue([]); const res = await request(server.getListener()!).get('/api/ps').expect(200); expect(res.body).toEqual({ models: [] }); }); test('returns empty result if server is stopped', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(inferenceManager.getServers).mockReturnValue([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'stopped', } as unknown as InferenceServer, ]); const res = await request(server.getListener()!).get('/api/ps').expect(200); expect(res.body).toEqual({ models: [] }); }); test('returns result if server is started', async () => { expect(server.getListener()).toBeDefined(); vi.mocked(inferenceManager.getServers).mockReturnValue([ { models: [ { id: 'modelId1', name: 'model-name', description: 'model 1', memory: 1_000_000, }, ], container: { engineId: 'engine1', containerId: 'container1', }, status: 'running', } as unknown as InferenceServer, ]); const res = await request(server.getListener()!).get('/api/ps').expect(200); expect(res.body).toEqual({ models: [ { name: 'model-name', model: 'model-name', size: 1_000_000, digest: 'b48fa42fa5b28c4363747ec0797532e274650f73004383a3054697137d9d1f30', }, ], }); }); }); ================================================ FILE: packages/backend/src/managers/apiServer.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { Disposable } from '@podman-desktop/api'; import type { NextFunction, Request, Response } from 'express'; import express from 'express'; import type { Server } from 'node:http'; import path, { resolve } from 'node:path'; import http from 'node:http'; import { existsSync } from 'node:fs'; import * as podmanDesktopApi from '@podman-desktop/api'; import { readFile } from 'node:fs/promises'; import type { ModelsManager } from './modelsManager'; import type { components } from '../../src-generated/openapi'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry'; import { getFreeRandomPort } from '../utils/ports'; import * as OpenApiValidator from 'express-openapi-validator'; import type { HttpError, OpenApiRequest } from 'express-openapi-validator/dist/framework/types'; import type { CatalogManager } from './catalogManager'; import { isProgressEvent } from '../models/baseEvent'; import type { InferenceManager } from './inference/inferenceManager'; import { withDefaultConfiguration } from '../utils/inferenceUtils'; import type { InferenceServer } from '@shared/models/IInference'; import OpenAI from 'openai'; import type { ChatCompletionMessageParam } from 'openai/resources'; import type { ContainerRegistry } from '../registries/ContainerRegistry'; import type { Stream } from 'openai/streaming'; import crypto from 'node:crypto'; import swaggerUi from 'swagger-ui-express'; import { getAbsoluteFSPath } from 'swagger-ui-dist'; import openAiApi from '../assets/openai.json'; const SHOW_API_ERROR_COMMAND = 'ai-lab.show-api-error'; export const PREFERENCE_RANDOM_PORT = 0; type ListModelResponse = components['schemas']['ListModelResponse']; type Message = components['schemas']['Message']; type ProcessModelResponse = components['schemas']['ProcessModelResponse']; interface SwaggerRequest extends Request { swaggerDoc?: { servers: { description: string; url: string }[] }; } function asListModelResponse(model: ModelInfo): ListModelResponse { return { model: model.name, name: model.name, digest: toDigest(model.name, model.sha256), size: model.file?.size, modified_at: model.file?.creation?.toISOString(), details: {}, }; } // ollama expect at least 12 characters for the digest function toDigest(name: string, sha256?: string): string { return sha256 ?? crypto.createHash('sha256').update(name).digest('hex'); } function asProcessModelResponse(model: ModelInfo): ProcessModelResponse { return { name: model.name, model: model.name, size: model.memory, digest: toDigest(model.name, model.sha256), }; } const LISTENING_ADDRESS = '0.0.0.0'; interface ChatCompletionOptions { server: InferenceServer; modelInfo: ModelInfo; messages: ChatCompletionMessageParam[]; stream: boolean; onStreamResponse: (response: Stream) => Promise; onNonStreamResponse: (response: OpenAI.Chat.Completions.ChatCompletion) => void; } export class ApiServer implements Disposable { #listener?: Server; constructor( private extensionContext: podmanDesktopApi.ExtensionContext, private modelsManager: ModelsManager, private catalogManager: CatalogManager, private inferenceManager: InferenceManager, private configurationRegistry: ConfigurationRegistry, private containerRegistry: ContainerRegistry, ) {} protected getListener(): Server | undefined { return this.#listener; } async init(): Promise { const app = express(); const router = express.Router(); router.use(express.json()); // validate requests / responses based on openapi spec router.use( OpenApiValidator.middleware({ apiSpec: this.getSpecFile(), validateRequests: true, validateResponses: { onError: (error, body, req) => { console.error(`Response body fails validation: `, error); console.error(`Emitted from:`, req.originalUrl); console.error(body); }, }, }), ); router.use((err: HttpError, _req: OpenApiRequest, res: Response, _next: NextFunction) => { // format errors from validator res.status(err.status || 500).json({ message: err.message, errors: err.errors, }); }); // declare routes router.get('/version', this.getVersion.bind(this)); router.get('/tags', this.getModels.bind(this)); router.post('/pull', this.pullModel.bind(this)); router.post('/show', this.show.bind(this)); router.post('/generate', this.generate.bind(this)); router.post('/chat', this.chat.bind(this)); router.get('/ps', this.ps.bind(this)); app.get('/', (_res, res) => res.sendStatus(200)); //required for the ollama client to work against us app.use('/api', router); app.use('/spec', this.getSpec.bind(this)); // get swagger-ui path from dist/swagger-ui const swaggerPath = resolve(getAbsoluteFSPath(), 'swagger-ui'); app.use( '/api-docs/:portNumber', function (req: SwaggerRequest, res: Response, next: NextFunction) { if (req.path.startsWith('/swagger-ui') && req.path !== '/swagger-ui-init.js') { // serve the swagger-ui-dist file from swaggerPath res.sendFile(path.join(swaggerPath, req.path)); return; } const copyOpenAiJson = structuredClone(openAiApi); // Extract the port number from the route parameter and set it as the server URL const portNumber = req.params.portNumber; copyOpenAiJson.servers = [{ description: 'AI Lab Inference Server', url: `http://localhost:${portNumber}` }]; req.swaggerDoc = copyOpenAiJson; next(); }, swaggerUi.serveFiles(openAiApi), swaggerUi.setup(), ); const server = http.createServer(app); let listeningOn = this.configurationRegistry.getExtensionConfiguration().apiPort; server.on('error', () => { this.displayApiError(listeningOn); }); if (listeningOn === PREFERENCE_RANDOM_PORT) { getFreeRandomPort(LISTENING_ADDRESS) .then((randomPort: number) => { listeningOn = randomPort; this.#listener = server.listen(listeningOn, LISTENING_ADDRESS); }) .catch((e: unknown) => { console.error('unable to get a free port for the api server', e); }); } else { this.#listener = server.listen(listeningOn, LISTENING_ADDRESS); } } displayApiError(port: number): void { const apiStatusBarItem = podmanDesktopApi.window.createStatusBarItem(); apiStatusBarItem.text = `AI Lab API listening error`; apiStatusBarItem.command = SHOW_API_ERROR_COMMAND; this.extensionContext.subscriptions.push( podmanDesktopApi.commands.registerCommand(SHOW_API_ERROR_COMMAND, async () => { const address = `http://localhost:${port}`; await podmanDesktopApi.window.showErrorMessage( `AI Lab API failed to listen on\n${address}\nYou can change the port in the Preferences then restart the extension.`, 'OK', ); }), apiStatusBarItem, ); apiStatusBarItem.show(); } private getFile(filepath: string): string { // when plugin is installed, the file is placed in the plugin directory (~/.local/share/containers/podman-desktop/plugins//) const prodFile = path.join(__dirname, filepath); if (existsSync(prodFile)) { return prodFile; } // return dev file return path.join(__dirname, '..', '..', filepath); } getSpecFile(): string { return this.getFile('../api/openapi.yaml'); } getPackageFile(): string { return this.getFile('../package.json'); } dispose(): void { this.#listener?.close(); } private doErr(res: Response, message: string, err: unknown): void { res.status(500).json({ message, errors: [err instanceof Error ? err.message : err], }); } getSpec(_req: Request, res: Response): void { try { const spec = this.getSpecFile(); readFile(spec, 'utf-8') .then(content => { res.status(200).type('application/yaml').send(content); }) .catch((err: unknown) => this.doErr(res, 'unable to get spec', err)); } catch (err: unknown) { this.doErr(res, 'unable to get spec', err); } } getVersion(_req: Request, res: Response): void { try { const pkg = this.getPackageFile(); readFile(pkg, 'utf-8') .then(content => { const json = JSON.parse(content); res.status(200).json({ version: `v${json.version}` }); }) .catch((err: unknown) => this.doErr(res, 'unable to get version', err)); } catch (err: unknown) { this.doErr(res, 'unable to get version', err); } } getModels(_req: Request, res: Response): void { try { const models = this.modelsManager .getModelsInfo() .filter(model => this.modelsManager.isModelOnDisk(model.id)) .map(model => asListModelResponse(model)); res.status(200).json({ models: models }); } catch (err: unknown) { this.doErr(res, 'unable to get models', err); } } private streamLine(res: Response, obj: unknown): void { res.write(JSON.stringify(obj) + '\n'); } private sendResult(res: Response, obj: unknown, code: number, stream: boolean): void { // eslint-disable-next-line sonarjs/no-selector-parameter if (stream) { this.streamLine(res, obj); } else { res.status(code).json(obj); } } pullModel(req: Request, res: Response): void { const modelName = req.body['model'] ?? req.body['name']; let stream: boolean = true; if ('stream' in req.body) { stream = req.body['stream']; } let modelInfo: ModelInfo; if (stream) { this.streamLine(res, { status: 'pulling manifest' }); } try { modelInfo = this.catalogManager.getModelByName(modelName); } catch { this.sendResult(res, { error: 'pull model manifest: file does not exist' }, 500, stream); res.end(); return; } if (this.modelsManager.isModelOnDisk(modelInfo.id)) { this.sendResult( res, { status: 'success', }, 200, stream, ); res.end(); return; } const abortController = new AbortController(); const downloader = this.modelsManager.createDownloader(modelInfo, abortController.signal); if (stream) { downloader.onEvent(event => { if (isProgressEvent(event) && event.id === modelName) { this.streamLine(res, { status: `pulling ${modelInfo.sha256}`, digest: `sha256:${modelInfo.sha256}`, total: event.total, completed: Math.round((event.total * event.value) / 100), }); } }, this); } downloader .perform(modelName) .then(async () => { await this.modelsManager.getLocalModelsFromDisk(); await this.modelsManager.sendModelsInfo(); this.sendResult( res, { status: 'success', }, 200, stream, ); }) .catch((err: unknown) => { this.sendResult( res, { error: String(err), }, 500, stream, ); }) .finally(() => { res.end(); }); } show(req: Request, res: Response): void { res.status(200).json({}); res.end(); } // makeServerAvailable checks if an inference server for the model exists and is started // if not, it creates and/or starts it, and wait for the service to be healthy private async makeServerAvailable(modelInfo: ModelInfo): Promise { let servers = this.inferenceManager.getServers(); let server = servers.find(s => s.models.map(mi => mi.id).includes(modelInfo.id)); if (!server) { const config = await withDefaultConfiguration({ modelsInfo: [modelInfo], }); await this.inferenceManager.createInferenceServer(config); } else if (server.status === 'stopped') { await this.inferenceManager.startInferenceServer(server.container.containerId); } else { return server; } servers = this.inferenceManager.getServers(); server = servers.find(s => s.models.map(mi => mi.id).includes(modelInfo.id)); if (!server) { throw new Error('unable to start inference server'); } // wait for the container to be healthy return new Promise(resolve => { const disposable = this.containerRegistry.onHealthyContainerEvent(event => { if (event.id !== server.container.containerId) { return; } disposable.dispose(); resolve(server); }); if (server.status === 'running' && server.health?.Status === 'healthy') { disposable.dispose(); resolve(server); } }); } // openAIChatCompletions executes a chat completion on an OpenAI compatible API private async openAIChatCompletions(options: ChatCompletionOptions): Promise { if (!options.modelInfo.file) { throw new Error('model info has undefined file.'); } const client = new OpenAI({ baseURL: `http://localhost:${options.server.connection.port}/v1`, apiKey: 'dummy', }); const createOptions = { messages: options.messages, model: options.modelInfo.name, }; // we call `create` with a fixed value of `stream`, to get the specific type of `response`, either Stream, or T if (options.stream) { const response = await client.chat.completions.create({ ...createOptions, stream: options.stream }); await options.onStreamResponse(response); } else { const response = await client.chat.completions.create({ ...createOptions, stream: options.stream }); options.onNonStreamResponse(response); } } // checkModelAvailability checks if a model is in the catalog // AND has been downloaded by the user private checkModelAvailability(modelName: string): ModelInfo { let modelInfo: ModelInfo; try { modelInfo = this.catalogManager.getModelByName(modelName); } catch { throw `chat: model "${modelName}" does not exist`; } if (!this.modelsManager.isModelOnDisk(modelInfo.id)) { throw `chat: model "${modelName}" not found, try pulling it first`; } return modelInfo; } // generate first starts the service if necessary // If a prompt is given, it runs a chat completion with a single message and returns the result generate(req: Request, res: Response): void { let stream: boolean = true; if ('stream' in req.body) { stream = req.body['stream']; } const prompt = req.body['prompt']; const modelName = req.body['model']; let modelInfo: ModelInfo; try { modelInfo = this.checkModelAvailability(modelName); } catch (error) { this.sendResult(res, { error }, 500, stream); res.end(); return; } // create/start inference server if necessary this.makeServerAvailable(modelInfo) .then(async (server: InferenceServer) => { if (!prompt) { this.sendResult( res, { model: modelName, response: '', done: true, done_reason: 'load', }, 200, stream, ); res.end(); return; } const messages = [ { content: prompt, role: 'user', name: undefined, } as ChatCompletionMessageParam, ]; await this.openAIChatCompletions({ server, modelInfo, messages, stream, onStreamResponse: async response => { for await (const chunk of response) { res.write( JSON.stringify({ model: modelName, response: chunk.choices[0].delta.content ?? '', done: chunk.choices[0].finish_reason === 'stop', done_reason: chunk.choices[0].finish_reason === 'stop' ? 'stop' : undefined, }) + '\n', ); } res.end(); }, onNonStreamResponse: response => { res.status(200).json({ model: modelName, response: response.choices[0].message.content ?? '', done: true, done_reason: 'stop', }); res.end(); }, }); }) .catch((err: unknown) => console.error(`unable to check if the inference server is running: ${err}`)); } // chat first starts the service if necessary // then runs a chat completion and returns the result chat(req: Request, res: Response): void { let stream: boolean = true; if ('stream' in req.body) { stream = req.body['stream']; } const messagesUser: Message[] = req.body['messages']; const modelName = req.body['model']; let modelInfo: ModelInfo; try { modelInfo = this.checkModelAvailability(modelName); } catch (error) { this.sendResult(res, { error }, 500, stream); res.end(); return; } // create/start inference server if necessary this.makeServerAvailable(modelInfo) .then(async (server: InferenceServer) => { const messages = messagesUser.map( message => ({ name: undefined, ...message, }) as ChatCompletionMessageParam, ); await this.openAIChatCompletions({ server, modelInfo, messages, stream, onStreamResponse: async response => { for await (const chunk of response) { res.write( JSON.stringify({ model: modelName, message: { role: 'assistant', content: chunk.choices[0].delta.content ?? '', }, done: chunk.choices[0].finish_reason === 'stop', done_reason: chunk.choices[0].finish_reason === 'stop' ? 'stop' : undefined, }) + '\n', ); } res.end(); }, onNonStreamResponse: response => { res.status(200).json({ model: modelName, message: { role: 'assistant', content: response.choices[0].message.content ?? '', }, done: true, done_reason: 'stop', }); res.end(); }, }); }) .catch((err: unknown) => console.error(`unable to check if the inference server is running: ${err}`)); } ps(_req: Request, res: Response): void { try { const models = this.inferenceManager .getServers() .filter(server => server.status === 'running') .flatMap(server => server.models) .map(model => asProcessModelResponse(model)); res.status(200).json({ models }); } catch (err: unknown) { this.doErr(res, 'unable to ps', err); } } } ================================================ FILE: packages/backend/src/managers/application/applicationManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { ContainerProviderConnection, PodInfo, TelemetryLogger } from '@podman-desktop/api'; import { containerEngine, window } from '@podman-desktop/api'; import type { PodmanConnection } from '../podmanConnection'; import type { CatalogManager } from '../catalogManager'; import type { ModelsManager } from '../modelsManager'; import type { PodManager } from '../recipes/PodManager'; import type { RecipeManager } from '../recipes/RecipeManager'; import { ApplicationManager } from './applicationManager'; import type { Recipe, RecipeImage } from '@shared/models/IRecipe'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { VMType } from '@shared/models/IPodman'; import { POD_LABEL_MODEL_ID, POD_LABEL_RECIPE_ID } from '../../utils/RecipeConstants'; import type { InferenceServer } from '@shared/models/IInference'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import type { LlamaStackManager } from '../llama-stack/llamaStackManager'; import type { ApplicationOptions } from '../../models/ApplicationOptions'; const taskRegistryMock = { createTask: vi.fn(), updateTask: vi.fn(), deleteByLabels: vi.fn(), } as unknown as TaskRegistry; const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; const podmanConnectionMock = { onPodmanConnectionEvent: vi.fn(), } as unknown as PodmanConnection; const catalogManagerMock = {} as unknown as CatalogManager; const modelsManagerMock = { requestDownloadModel: vi.fn(), uploadModelToPodmanMachine: vi.fn(), } as unknown as ModelsManager; const telemetryMock = { logError: vi.fn(), logUsage: vi.fn(), } as unknown as TelemetryLogger; const podManager = { onStartPodEvent: vi.fn(), onRemovePodEvent: vi.fn(), getPodsWithLabels: vi.fn(), createPod: vi.fn(), getPod: vi.fn(), findPodByLabelsValues: vi.fn(), startPod: vi.fn(), stopPod: vi.fn(), removePod: vi.fn(), } as unknown as PodManager; const recipeManager = { cloneRecipe: vi.fn(), buildRecipe: vi.fn(), } as unknown as RecipeManager; const llamaStackManager = { getLlamaStackContainers: vi.fn(), } as unknown as LlamaStackManager; vi.mock('@podman-desktop/api', () => ({ window: { withProgress: vi.fn(), }, ProgressLocation: { TASK_WIDGET: 'task-widget', }, provider: { getContainerConnections: vi.fn(), }, containerEngine: { createContainer: vi.fn(), }, Disposable: { create: vi.fn(), }, })); const recipeMock: Recipe = { id: 'recipe-test', name: 'Test Recipe', categories: [], description: 'test recipe description', repository: 'http://test-repository.test', readme: 'test recipe readme', }; const remoteModelMock: ModelInfo = { id: 'model-test', name: 'Test Model', description: 'test model description', url: 'http://test-repository.test', }; const recipeImageInfoMock: RecipeImage = { name: 'test recipe image info', id: 'test-recipe-image-info', appName: 'test-app-name', engineId: 'test-engine-id', ports: [], modelService: false, recipeId: recipeMock.id, }; const connectionMock: ContainerProviderConnection = { name: 'Podman Machine', vmType: VMType.UNKNOWN, } as unknown as ContainerProviderConnection; beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); vi.mocked(recipeManager.buildRecipe).mockResolvedValue({ images: [recipeImageInfoMock] }); vi.mocked(podManager.createPod).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id' }); vi.mocked(podManager.getPod).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id' } as PodInfo); vi.mocked(podManager.getPodsWithLabels).mockResolvedValue([]); vi.mocked(taskRegistryMock.createTask).mockImplementation((name, state, labels) => ({ name, state, labels, id: 'fake-task', })); vi.mocked(modelsManagerMock.uploadModelToPodmanMachine).mockResolvedValue('downloaded-model-path'); vi.mocked(llamaStackManager.getLlamaStackContainers).mockResolvedValue({ server: { containerId: 'container1', port: 10001, state: 'running' }, playground: { containerId: 'playground1', port: 10002, state: 'running' }, }); }); function getInitializedApplicationManager(): ApplicationManager { const manager = new ApplicationManager( taskRegistryMock, rpcExtensionMock, podmanConnectionMock, catalogManagerMock, modelsManagerMock, telemetryMock, podManager, recipeManager, llamaStackManager, ); manager.init(); return manager; } describe('requestPullApplication', () => { test('task should be set to error if pull application raise an error', async () => { vi.mocked(window.withProgress).mockRejectedValue(new Error('pull application error')); const trackingId = await getInitializedApplicationManager().requestPullApplication({ connection: connectionMock, recipe: recipeMock, model: remoteModelMock, }); // ensure the task is created await vi.waitFor(() => { expect(taskRegistryMock.createTask).toHaveBeenCalledWith(`Pulling ${recipeMock.name} recipe`, 'loading', { trackingId: trackingId, 'recipe-pulling': recipeMock.id, }); }); // ensure the task is updated await vi.waitFor(() => { expect(taskRegistryMock.updateTask).toHaveBeenCalledWith( expect.objectContaining({ state: 'error', }), ); }); }); }); describe('stopApplication', () => { test('calling stop with exited pod should not create task', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id-existing', Labels: { [POD_LABEL_MODEL_ID]: remoteModelMock.id, [POD_LABEL_RECIPE_ID]: recipeMock.id, }, Status: 'Exited', } as unknown as PodInfo); await getInitializedApplicationManager().stopApplication(recipeMock.id, remoteModelMock.id); expect(taskRegistryMock.createTask).not.toHaveBeenCalled(); expect(podManager.stopPod).not.toHaveBeenCalled(); }); test('calling stop application with running pod should create stop task ', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id-existing', Labels: { [POD_LABEL_MODEL_ID]: remoteModelMock.id, [POD_LABEL_RECIPE_ID]: recipeMock.id, }, Status: 'Running', } as unknown as PodInfo); await getInitializedApplicationManager().stopApplication(recipeMock.id, remoteModelMock.id); expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Stopping AI App', 'loading', { 'recipe-id': recipeMock.id, 'model-id': remoteModelMock.id, }); expect(podManager.stopPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id-existing'); }); test('error raised should make the task as failed', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id-existing', Labels: { [POD_LABEL_MODEL_ID]: remoteModelMock.id, [POD_LABEL_RECIPE_ID]: recipeMock.id, }, Status: 'Running', } as unknown as PodInfo); vi.mocked(podManager.stopPod).mockRejectedValue(new Error('stop pod error')); await expect(() => { return getInitializedApplicationManager().stopApplication(recipeMock.id, remoteModelMock.id); }).rejects.toThrowError('stop pod error'); expect(taskRegistryMock.updateTask).toHaveBeenCalledWith( expect.objectContaining({ state: 'error', }), ); }); }); describe('startApplication', () => { test('expect startPod in podManager to be properly called', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id-existing', Labels: { [POD_LABEL_MODEL_ID]: remoteModelMock.id, [POD_LABEL_RECIPE_ID]: recipeMock.id, }, Status: 'Exited', } as unknown as PodInfo); await getInitializedApplicationManager().startApplication(recipeMock.id, remoteModelMock.id); expect(podManager.startPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id-existing'); }); test('error raised should make the task as failed', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id-existing', Labels: { [POD_LABEL_MODEL_ID]: remoteModelMock.id, [POD_LABEL_RECIPE_ID]: recipeMock.id, }, Status: 'Exited', } as unknown as PodInfo); vi.mocked(podManager.startPod).mockRejectedValue(new Error('start pod error')); await expect(() => { return getInitializedApplicationManager().startApplication(recipeMock.id, remoteModelMock.id); }).rejects.toThrowError('start pod error'); expect(taskRegistryMock.updateTask).toHaveBeenCalledWith( expect.objectContaining({ state: 'error', }), ); }); }); describe.each([true, false])('pullApplication, with model is %o', withModel => { let applicationOptions: ApplicationOptions; beforeEach(() => { applicationOptions = withModel ? { connection: connectionMock, recipe: recipeMock, model: remoteModelMock, } : { connection: connectionMock, recipe: recipeMock, dependencies: { llamaStack: true, }, }; }); test('labels should be propagated', async () => { await getInitializedApplicationManager().pullApplication(applicationOptions, { 'test-label': 'test-value', }); // clone the recipe expect(recipeManager.cloneRecipe).toHaveBeenCalledWith(recipeMock, { 'test-label': 'test-value', 'model-id': withModel ? remoteModelMock.id : '', }); if (withModel) { // download model expect(modelsManagerMock.requestDownloadModel).toHaveBeenCalledWith(remoteModelMock, { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': remoteModelMock.id, }); // upload model to podman machine expect(modelsManagerMock.uploadModelToPodmanMachine).toHaveBeenCalledWith(connectionMock, remoteModelMock, { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': remoteModelMock.id, }); } // build the recipe expect(recipeManager.buildRecipe).toHaveBeenCalledWith( { connection: connectionMock, recipe: recipeMock, model: withModel ? remoteModelMock : undefined, dependencies: applicationOptions.dependencies, }, { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': withModel ? remoteModelMock.id : '', }, ); // create AI App task must be created expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Creating AI App', 'loading', { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': withModel ? remoteModelMock.id : '', }); // a pod must have been created expect(podManager.createPod).toHaveBeenCalledWith({ provider: connectionMock, name: expect.any(String), portmappings: [], labels: { [POD_LABEL_MODEL_ID]: withModel ? remoteModelMock.id : '', [POD_LABEL_RECIPE_ID]: recipeMock.id, }, }); expect(containerEngine.createContainer).toHaveBeenCalledWith('test-engine-id', { Image: recipeImageInfoMock.id, name: expect.any(String), Env: withModel ? [] : ['MODEL_ENDPOINT=http://host.containers.internal:10001'], HealthCheck: undefined, HostConfig: undefined, Detach: true, pod: 'test-pod-id', start: false, }); // finally the pod must be started expect(podManager.startPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id'); }); test('requestDownloadModel skipped with inference server', async () => { vi.mocked(recipeManager.buildRecipe).mockResolvedValue({ images: [recipeImageInfoMock], inferenceServer: { connection: { port: 56001, }, } as InferenceServer, }); vi.mocked(modelsManagerMock.requestDownloadModel).mockResolvedValue('/path/to/model'); await getInitializedApplicationManager().pullApplication(applicationOptions, { 'test-label': 'test-value', }); // clone the recipe expect(recipeManager.cloneRecipe).toHaveBeenCalledWith(recipeMock, { 'test-label': 'test-value', 'model-id': withModel ? remoteModelMock.id : '', }); if (withModel) { // download model expect(modelsManagerMock.requestDownloadModel).toHaveBeenCalledWith(remoteModelMock, { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': remoteModelMock.id, }); // upload model to podman machine expect(modelsManagerMock.uploadModelToPodmanMachine).not.toHaveBeenCalled(); } // build the recipe expect(recipeManager.buildRecipe).toHaveBeenCalledWith( { connection: connectionMock, recipe: recipeMock, model: withModel ? remoteModelMock : undefined, dependencies: applicationOptions.dependencies, }, { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': withModel ? remoteModelMock.id : '', }, ); // create AI App task must be created expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Creating AI App', 'loading', { 'test-label': 'test-value', 'recipe-id': recipeMock.id, 'model-id': withModel ? remoteModelMock.id : '', }); // a pod must have been created expect(podManager.createPod).toHaveBeenCalledWith({ provider: connectionMock, name: expect.any(String), portmappings: [], labels: { [POD_LABEL_MODEL_ID]: withModel ? remoteModelMock.id : '', [POD_LABEL_RECIPE_ID]: recipeMock.id, }, }); expect(containerEngine.createContainer).toHaveBeenCalledWith('test-engine-id', { Image: recipeImageInfoMock.id, name: expect.any(String), Env: withModel ? ['MODEL_ENDPOINT=http://host.containers.internal:56001'] : ['MODEL_ENDPOINT=http://host.containers.internal:10001'], HealthCheck: undefined, HostConfig: undefined, Detach: true, pod: 'test-pod-id', start: false, }); // finally the pod must be started expect(podManager.startPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id'); }); test('existing application should be removed', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id-existing', Labels: { [POD_LABEL_MODEL_ID]: remoteModelMock.id, [POD_LABEL_RECIPE_ID]: recipeMock.id, }, } as unknown as PodInfo); await getInitializedApplicationManager().pullApplication(applicationOptions); // removing existing application should create a task to notify the user expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Removing AI App', 'loading', { 'recipe-id': recipeMock.id, 'model-id': withModel ? remoteModelMock.id : '', }); // the remove pod should have been called expect(podManager.removePod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id-existing'); }); test('qemu connection should have specific flag', async () => { vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue(undefined); vi.mocked(recipeManager.buildRecipe).mockResolvedValue({ images: [ recipeImageInfoMock, { modelService: true, ports: ['8888'], name: 'llamacpp', id: 'llamacpp', appName: 'llamacpp', engineId: recipeImageInfoMock.engineId, recipeId: recipeMock.id, }, ], }); await getInitializedApplicationManager().pullApplication(applicationOptions); // the remove pod should have been called expect(containerEngine.createContainer).toHaveBeenCalledWith( recipeImageInfoMock.engineId, expect.objectContaining({ HostConfig: withModel ? { Mounts: [ { Mode: 'Z', Source: 'downloaded-model-path', Target: '/downloaded-model-path', Type: 'bind', }, ], } : undefined, }), ); }); }); ================================================ FILE: packages/backend/src/managers/application/applicationManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RecipeComponents, RecipeImage } from '@shared/models/IRecipe'; import * as path from 'node:path'; import { containerEngine, Disposable, window, ProgressLocation } from '@podman-desktop/api'; import type { PodCreatePortOptions, TelemetryLogger, PodInfo, HostConfig, HealthConfig, PodContainerInfo, ContainerProviderConnection, } from '@podman-desktop/api'; import type { ModelsManager } from '../modelsManager'; import { getPortsFromLabel, getPortsInfo } from '../../utils/ports'; import { getDurationSecondsSince, timeout } from '../../utils/utils'; import type { ApplicationState } from '@shared/models/IApplicationState'; import type { PodmanConnection } from '../podmanConnection'; import { MSG_APPLICATIONS_STATE_UPDATE } from '@shared/Messages'; import type { CatalogManager } from '../catalogManager'; import { ApplicationRegistry } from '../../registries/ApplicationRegistry'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { Publisher } from '../../utils/Publisher'; import { getModelPropertiesForEnvironment } from '../../utils/modelsUtils'; import { getRandomName, getRandomString } from '../../utils/randomUtils'; import type { PodManager } from '../recipes/PodManager'; import { SECOND } from '../../workers/provider/LlamaCppPython'; import type { RecipeManager } from '../recipes/RecipeManager'; import { POD_LABEL_APP_PORTS, POD_LABEL_MODEL_ID, POD_LABEL_MODEL_PORTS, POD_LABEL_RECIPE_ID, } from '../../utils/RecipeConstants'; import { VMType } from '@shared/models/IPodman'; import { RECIPE_START_ROUTE } from '../../registries/NavigationRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { TaskRunner } from '../TaskRunner'; import { getInferenceType } from '../../utils/inferenceUtils'; import type { LlamaStackManager } from '../llama-stack/llamaStackManager'; import { isApplicationOptionsWithModelInference, type ApplicationOptions } from '../../models/ApplicationOptions'; export class ApplicationManager extends Publisher implements Disposable { #applications: ApplicationRegistry; protectTasks: Set = new Set(); #disposables: Disposable[]; #taskRunner: TaskRunner; constructor( private taskRegistry: TaskRegistry, rpcExtension: RpcExtension, private podmanConnection: PodmanConnection, private catalogManager: CatalogManager, private modelsManager: ModelsManager, private telemetry: TelemetryLogger, private podManager: PodManager, private recipeManager: RecipeManager, private llamaStackManager: LlamaStackManager, ) { super(rpcExtension, MSG_APPLICATIONS_STATE_UPDATE, () => this.getApplicationsState()); this.#applications = new ApplicationRegistry(); this.#taskRunner = new TaskRunner(this.taskRegistry); this.#disposables = []; } async requestPullApplication(options: ApplicationOptions): Promise { // create a tracking id to put in the labels const trackingId: string = getRandomString(); const labels: Record = { trackingId: trackingId, }; this.#taskRunner .runAsTask( { ...labels, 'recipe-pulling': options.recipe.id, // this label should only be on the master task }, { loadingLabel: `Pulling ${options.recipe.name} recipe`, errorMsg: err => `Something went wrong while pulling ${options.recipe.name}: ${String(err)}`, }, () => window.withProgress( { location: ProgressLocation.TASK_WIDGET, title: `Pulling ${options.recipe.name}.`, details: { routeId: RECIPE_START_ROUTE, routeArgs: [options.recipe.id, trackingId], }, }, () => this.pullApplication(options, labels), ), ) .catch(() => {}); return trackingId; } async pullApplication(options: ApplicationOptions, labels: Record = {}): Promise { let modelId: string; if (isApplicationOptionsWithModelInference(options)) { modelId = options.model.id; } else { modelId = ''; } // clear any existing status / tasks related to the pair recipeId-modelId. this.taskRegistry.deleteByLabels({ 'recipe-id': options.recipe.id, 'model-id': modelId, }); const startTime = performance.now(); try { // init application (git clone, models download etc.) const podInfo: PodInfo = await this.initApplication(options, labels); // start the pod await this.runApplication(podInfo, { ...labels, 'recipe-id': options.recipe.id, 'model-id': modelId, }); // measure init + start time const durationSeconds = getDurationSecondsSince(startTime); this.telemetry.logUsage('recipe.pull', { 'recipe.id': options.recipe.id, 'recipe.name': options.recipe.name, durationSeconds, }); } catch (err: unknown) { const durationSeconds = getDurationSecondsSince(startTime); this.telemetry.logError('recipe.pull', { 'recipe.id': options.recipe.id, 'recipe.name': options.recipe.name, durationSeconds, message: 'error pulling application', error: err, }); throw err; } } /** * This method will execute the following tasks * - git clone * - git checkout * - register local repository * - download models * - upload models * - build containers * - create pod * * @param connection * @param recipe * @param model * @param labels * @private */ private async initApplication(options: ApplicationOptions, labels: Record = {}): Promise { let modelId: string; if (isApplicationOptionsWithModelInference(options)) { modelId = options.model.id; } else { modelId = ''; } // clone the recipe await this.recipeManager.cloneRecipe(options.recipe, { ...labels, 'model-id': modelId }); let modelPath: string | undefined; if (isApplicationOptionsWithModelInference(options)) { // get model by downloading it or retrieving locally modelPath = await this.modelsManager.requestDownloadModel(options.model, { ...labels, 'recipe-id': options.recipe.id, 'model-id': modelId, }); } // build all images, one per container (for a basic sample we should have 2 containers = sample app + model service) const recipeComponents = await this.recipeManager.buildRecipe(options, { ...labels, 'recipe-id': options.recipe.id, 'model-id': modelId, }); if (isApplicationOptionsWithModelInference(options)) { // upload model to podman machine if user system is supported if (!recipeComponents.inferenceServer) { modelPath = await this.modelsManager.uploadModelToPodmanMachine(options.connection, options.model, { ...labels, 'recipe-id': options.recipe.id, 'model-id': modelId, }); } } // first delete any existing pod with matching labels if (await this.hasApplicationPod(options.recipe.id, modelId)) { await this.removeApplication(options.recipe.id, modelId); } // create a pod containing all the containers to run the application return this.createApplicationPod(options, recipeComponents, modelPath, { ...labels, 'recipe-id': options.recipe.id, 'model-id': modelId, }); } /** * Given an ApplicationPodInfo, start the corresponding pod * @param podInfo * @param labels */ protected async runApplication(podInfo: PodInfo, labels?: { [key: string]: string }): Promise { await this.#taskRunner.runAsTask( labels ?? {}, { loadingLabel: 'Starting AI App', successLabel: 'AI App is running', errorMsg: err => String(err), }, async () => { await this.podManager.startPod(podInfo.engineId, podInfo.Id); // check if all containers have started successfully for (const container of podInfo.Containers ?? []) { await this.waitContainerIsRunning(podInfo.engineId, container); } }, ); return this.checkPodsHealth(); } protected async waitContainerIsRunning(engineId: string, container: PodContainerInfo): Promise { const TIME_FRAME_MS = 5000; const MAX_ATTEMPTS = 60 * (60000 / TIME_FRAME_MS); // try for 1 hour for (let i = 0; i < MAX_ATTEMPTS; i++) { const sampleAppContainerInspectInfo = await containerEngine.inspectContainer(engineId, container.Id); if (sampleAppContainerInspectInfo.State.Running) { return; } await timeout(TIME_FRAME_MS); } throw new Error(`Container ${container.Id} not started in time`); } protected async createApplicationPod( options: ApplicationOptions, components: RecipeComponents, modelPath: string | undefined, labels?: { [key: string]: string }, ): Promise { return this.#taskRunner.runAsTask( labels ?? {}, { loadingLabel: 'Creating AI App', errorMsg: err => `Something went wrong while creating pod: ${String(err)}`, }, async ({ updateLabels }): Promise => { const podInfo = await this.createPod(options, components.images); updateLabels(labels => ({ ...labels, 'pod-id': podInfo.Id, })); await this.createContainerAndAttachToPod(options, podInfo, components, modelPath, labels); return podInfo; }, ); } protected async createContainerAndAttachToPod( options: ApplicationOptions, podInfo: PodInfo, components: RecipeComponents, modelPath: string | undefined, labels?: { [key: string]: string }, ): Promise { const vmType = options.connection.vmType ?? VMType.UNKNOWN; // temporary check to set Z flag or not - to be removed when switching to podman 5 await Promise.all( components.images.map(async image => { let hostConfig: HostConfig | undefined = undefined; let envs: string[] = []; let healthcheck: HealthConfig | undefined = undefined; // if it's a model service we mount the model as a volume if (modelPath && isApplicationOptionsWithModelInference(options)) { if (image.modelService) { const modelName = path.basename(modelPath); hostConfig = { Mounts: [ { Target: `/${modelName}`, Source: modelPath, Type: 'bind', Mode: vmType === VMType.QEMU ? undefined : 'Z', }, ], }; envs = [`MODEL_PATH=/${modelName}`]; envs.push(...getModelPropertiesForEnvironment(options.model)); } else if (components.inferenceServer) { const endPoint = `http://host.containers.internal:${components.inferenceServer.connection.port}`; envs = [`MODEL_ENDPOINT=${endPoint}`]; } else { const modelService = components.images.find(image => image.modelService); if (modelService && modelService.ports.length > 0) { const endPoint = `http://localhost:${modelService.ports[0]}`; envs = [`MODEL_ENDPOINT=${endPoint}`]; } } } else if (options.dependencies?.llamaStack) { let stack = await this.llamaStackManager.getLlamaStackContainers(); if (!stack) { await this.llamaStackManager.createLlamaStackContainers(options.connection, labels ?? {}); stack = await this.llamaStackManager.getLlamaStackContainers(); } if (stack) { envs = [`MODEL_ENDPOINT=http://host.containers.internal:${stack.server?.port}`]; } } if (image.ports.length > 0) { healthcheck = { // must be the port INSIDE the container not the exposed one Test: ['CMD-SHELL', `curl -s localhost:${image.ports[0]} > /dev/null`], Interval: SECOND * 5, Retries: 4 * 5, Timeout: SECOND * 2, }; } const podifiedName = getRandomName(`${image.appName}-podified`); await containerEngine.createContainer(podInfo.engineId, { Image: image.id, name: podifiedName, Detach: true, HostConfig: hostConfig, Env: envs, start: false, pod: podInfo.Id, HealthCheck: healthcheck, }); }), ); } protected async createPod(options: ApplicationOptions, images: RecipeImage[]): Promise { // find the exposed port of the sample app so we can open its ports on the new pod const sampleAppImageInfo = images.find(image => !image.modelService); if (!sampleAppImageInfo) { console.error('no sample app image found'); throw new Error('no sample app found'); } const portmappings: PodCreatePortOptions[] = []; // we expose all ports so we can check the model service if it is actually running for (const image of images) { for (const exposed of image.ports) { const localPorts = await getPortsInfo(exposed); if (localPorts) { portmappings.push({ container_port: parseInt(exposed), host_port: parseInt(localPorts), host_ip: '', protocol: '', range: 1, }); } } } // create new pod const labels: Record = { [POD_LABEL_RECIPE_ID]: options.recipe.id, }; if (isApplicationOptionsWithModelInference(options)) { labels[POD_LABEL_MODEL_ID] = options.model.id; } else { labels[POD_LABEL_MODEL_ID] = ''; } // collecting all modelService ports const modelPorts = images .filter(img => img.modelService) .flatMap(img => img.ports) .map(port => portmappings.find(pm => `${pm.container_port}` === port)?.host_port); if (modelPorts.length) { labels[POD_LABEL_MODEL_PORTS] = modelPorts.join(','); } // collecting all application ports (excluding service ports) const appPorts = images .filter(img => !img.modelService) .flatMap(img => img.ports) .map(port => portmappings.find(pm => `${pm.container_port}` === port)?.host_port); if (appPorts.length) { labels[POD_LABEL_APP_PORTS] = appPorts.join(','); } const { engineId, Id } = await this.podManager.createPod({ provider: options.connection, name: getRandomName(`pod-${sampleAppImageInfo.appName}`), portmappings: portmappings, labels, }); return this.podManager.getPod(engineId, Id); } /** * Stop the pod with matching recipeId and modelId * @param recipeId * @param modelId */ async stopApplication(recipeId: string, modelId: string): Promise { // clear existing tasks this.clearTasks(recipeId, modelId); // get the application pod const appPod = await this.getApplicationPod(recipeId, modelId); // if the pod is already stopped skip if (appPod.Status !== 'Exited') { await this.#taskRunner.runAsTask( { 'recipe-id': recipeId, 'model-id': modelId, }, { loadingLabel: 'Stopping AI App', successLabel: 'AI App Stopped', errorLabel: 'Error stopping AI App', errorMsg: err => `Error removing the pod.: ${String(err)}`, }, () => this.podManager.stopPod(appPod.engineId, appPod.Id), ); await this.checkPodsHealth(); } return appPod; } /** * Utility method to start a pod using (recipeId, modelId) * @param recipeId * @param modelId */ async startApplication(recipeId: string, modelId: string): Promise { this.clearTasks(recipeId, modelId); const pod = await this.getApplicationPod(recipeId, modelId); return this.runApplication(pod, { 'recipe-id': recipeId, 'model-id': modelId, }); } protected refresh(): void { // clear existing applications this.#applications.clear(); // collect all pods based on label this.podManager .getPodsWithLabels([POD_LABEL_RECIPE_ID]) .then(pods => { pods.forEach(pod => this.adoptPod(pod)); }) .catch((err: unknown) => { console.error('error during adoption of existing playground containers', err); }); // notify this.notify(); } init(): void { this.podmanConnection.onPodmanConnectionEvent(() => { this.refresh(); }); this.podManager.onStartPodEvent((pod: PodInfo) => { this.adoptPod(pod); }); this.podManager.onRemovePodEvent(({ podId }) => { this.forgetPodById(podId); }); const ticker = (): void => { this.checkPodsHealth() .catch((err: unknown) => { console.error('error getting pods statuses', err); }) .finally(() => (timerId = setTimeout(ticker, 10000))); }; // using a recursive setTimeout instead of setInterval as we don't know how long the operation takes let timerId = setTimeout(ticker, 1000); this.#disposables.push( Disposable.create(() => { clearTimeout(timerId); }), ); // refresh on init this.refresh(); } protected adoptPod(pod: PodInfo): void { if (!pod.Labels) { return; } const recipeId = pod.Labels[POD_LABEL_RECIPE_ID]; const modelId = pod.Labels[POD_LABEL_MODEL_ID]; if (!recipeId || !modelId) { return; } const appPorts = getPortsFromLabel(pod.Labels, POD_LABEL_APP_PORTS); const modelPorts = getPortsFromLabel(pod.Labels, POD_LABEL_MODEL_PORTS); if (this.#applications.has({ recipeId, modelId })) { return; } const state: ApplicationState = { recipeId, modelId, pod, appPorts, modelPorts, health: 'starting', backend: getInferenceType(this.modelsManager.getModelsInfo().filter(m => m.id === modelId)), }; this.updateApplicationState(recipeId, modelId, state); } protected forgetPodById(podId: string): void { const app = Array.from(this.#applications.values()).find(p => p.pod.Id === podId); if (!app) { return; } if (!app.pod.Labels) { return; } const recipeId = app.pod.Labels[POD_LABEL_RECIPE_ID]; const modelId = app.pod.Labels[POD_LABEL_MODEL_ID]; if (!recipeId || !modelId) { return; } if (!this.#applications.has({ recipeId, modelId })) { return; } this.#applications.delete({ recipeId, modelId }); this.notify(); const protect = this.protectTasks.has(podId); if (!protect) { this.taskRegistry.createTask('AI App stopped manually', 'success', { 'recipe-id': recipeId, 'model-id': modelId, }); } else { this.protectTasks.delete(podId); } } protected async checkPodsHealth(): Promise { const pods = await this.podManager.getPodsWithLabels([POD_LABEL_RECIPE_ID, POD_LABEL_MODEL_ID]); let changes = false; for (const pod of pods) { const recipeId = pod.Labels[POD_LABEL_RECIPE_ID]; const modelId = pod.Labels[POD_LABEL_MODEL_ID]; if (!this.#applications.has({ recipeId, modelId })) { // a fresh pod could not have been added yet, we will handle it at next iteration continue; } const podHealth = await this.podManager.getHealth(pod); const state = this.#applications.get({ recipeId, modelId }); if (state.health !== podHealth) { state.health = podHealth; state.pod = pod; this.#applications.set({ recipeId, modelId }, state); changes = true; } if (pod.Status !== state.pod.Status) { state.pod = pod; changes = true; } } if (changes) { this.notify(); } } protected updateApplicationState(recipeId: string, modelId: string, state: ApplicationState): void { this.#applications.set({ recipeId, modelId }, state); this.notify(); } getApplicationsState(): ApplicationState[] { return Array.from(this.#applications.values()); } protected clearTasks(recipeId: string, modelId: string): void { // clear any existing status / tasks related to the pair recipeId-modelId. this.taskRegistry.deleteByLabels({ 'recipe-id': recipeId, 'model-id': modelId, }); } /** * Method that will stop then remove a pod corresponding to the recipe and model provided * @param recipeId * @param modelId */ async removeApplication(recipeId: string, modelId: string): Promise { const appPod = await this.stopApplication(recipeId, modelId); this.protectTasks.add(appPod.Id); await this.#taskRunner.runAsTask( { 'recipe-id': recipeId, 'model-id': modelId, }, { loadingLabel: 'Removing AI App', successLabel: 'AI App Removed', errorLabel: 'Error stopping AI App', errorMsg: () => 'error removing the pod. Please try to remove the pod manually', }, () => this.podManager.removePod(appPod.engineId, appPod.Id), ); } async restartApplication(connection: ContainerProviderConnection, recipeId: string, modelId: string): Promise { const appPod = await this.getApplicationPod(recipeId, modelId); await this.removeApplication(recipeId, modelId); const recipe = this.catalogManager.getRecipeById(recipeId); let opts: ApplicationOptions; if (appPod.Labels[POD_LABEL_MODEL_ID] === '') { opts = { connection, recipe, }; } else { const model = this.catalogManager.getModelById(appPod.Labels[POD_LABEL_MODEL_ID]); opts = { connection, recipe, model, }; } // init the recipe const podInfo = await this.initApplication(opts); // start the pod return this.runApplication(podInfo, { 'recipe-id': recipeId, 'model-id': modelId, }); } async getApplicationPorts(recipeId: string, modelId: string): Promise { const state = this.#applications.get({ recipeId, modelId }); if (state) { return state.appPorts; } throw new Error(`Recipe ${recipeId} has no ports available`); } protected async getApplicationPod(recipeId: string, modelId: string): Promise { const appPod = await this.findPod(recipeId, modelId); if (!appPod) { throw new Error(`no pod found with recipe Id ${recipeId} and model Id ${modelId}`); } return appPod; } protected async hasApplicationPod(recipeId: string, modelId: string): Promise { const pod = await this.podManager.findPodByLabelsValues({ LABEL_RECIPE_ID: recipeId, LABEL_MODEL_ID: modelId, }); return !!pod; } protected async findPod(recipeId: string, modelId: string): Promise { return this.podManager.findPodByLabelsValues({ [POD_LABEL_RECIPE_ID]: recipeId, [POD_LABEL_MODEL_ID]: modelId, }); } dispose(): void { this.#disposables.forEach(disposable => disposable.dispose()); } } ================================================ FILE: packages/backend/src/managers/catalogManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ /* eslint-disable @typescript-eslint/no-explicit-any */ import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest'; import content from '../tests/ai-test.json'; import userContent from '../tests/ai-user-test.json'; import { EventEmitter, window } from '@podman-desktop/api'; import { CatalogManager } from './catalogManager'; import type { Stats } from 'node:fs'; import { promises, existsSync } from 'node:fs'; import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog'; import path from 'node:path'; import { version } from '../assets/ai.json'; import * as catalogUtils from '../utils/catalogUtils'; import type { RpcExtension } from '@shared/messages/MessageProxy'; vi.mock('../assets/ai.json', async importOriginal => { // eslint-disable-next-line @typescript-eslint/consistent-type-imports const { version } = await importOriginal(); return { default: { ...content, version: version }, version: version, }; }); vi.mock('node:fs'); vi.mock('node:fs/promises'); vi.mock('node:path'); vi.mock('@podman-desktop/api', async () => { return { EventEmitter: vi.fn(), window: { showNotification: vi.fn(), }, ProgressLocation: { TASK_WIDGET: 'TASK_WIDGET', }, fs: { createFileSystemWatcher: (): unknown => ({ onDidCreate: vi.fn(), onDidDelete: vi.fn(), onDidChange: vi.fn(), }), }, }; }); let catalogManager: CatalogManager; beforeEach(async () => { vi.resetAllMocks(); // mock EventEmitter logic for all tests vi.mocked(EventEmitter).mockImplementation(() => { const listeners: ((value: unknown) => void)[] = []; return { event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter; }); const appUserDirectory = '.'; // Creating CatalogManager catalogManager = new CatalogManager( { fire: vi.fn().mockResolvedValue(true), } as unknown as RpcExtension, appUserDirectory, ); }); describe('invalid user catalog', () => { beforeEach(async () => { vi.mocked(promises.readFile).mockResolvedValue('invalid json'); await catalogManager.init(); }); test('expect correct model is returned with valid id', () => { const model = catalogManager.getModelById('llama-2-7b-chat.Q5_K_S'); expect(model).toBeDefined(); expect(model.name).toEqual('Llama-2-7B-Chat-GGUF'); expect(model.registry).toEqual('Hugging Face'); expect(model.url).toEqual( 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf', ); }); test('expect error if id does not correspond to any model', () => { expect(() => catalogManager.getModelById('unknown')).toThrowError('No model found having id unknown'); }); }); test('expect correct model is returned from default catalog with valid id when no user catalog exists', async () => { vi.mocked(existsSync).mockReturnValue(false); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getRecipes().length > 0); const model = catalogManager.getModelById('llama-2-7b-chat.Q5_K_S'); expect(model).toBeDefined(); expect(model.name).toEqual('Llama-2-7B-Chat-GGUF'); expect(model.registry).toEqual('Hugging Face'); expect(model.url).toEqual( 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf', ); }); test('expect correct model is returned with valid id when the user catalog is valid', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1')); const model = catalogManager.getModelById('model1'); expect(model).toBeDefined(); expect(model.name).toEqual('Model 1'); expect(model.registry).toEqual('Hugging Face'); expect(model.url).toEqual('https://model1.example.com'); }); test('expect to call writeFile in addLocalModelsToCatalog with catalog updated', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getRecipes().length > 0); const mtimeDate = new Date('2024-04-03T09:51:15.766Z'); vi.mocked(promises.stat).mockResolvedValue({ size: 1, mtime: mtimeDate, } as Stats); vi.mocked(path.resolve).mockReturnValue('path'); vi.mocked(promises.writeFile).mockResolvedValue(); await catalogManager.importUserModels([ { name: 'custom-model', path: '/root/path/file.gguf', }, ]); expect(promises.mkdir).toHaveBeenCalled(); expect(promises.writeFile).toBeCalledWith('path', expect.any(String), 'utf-8'); }); test('expect to call writeFile in removeLocalModelFromCatalog with catalog updated', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); vi.mocked(path.resolve).mockReturnValue('path'); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getRecipes().length > 0); vi.mocked(promises.writeFile).mockResolvedValue(); const updatedCatalog: ApplicationCatalog = { ...userContent }; updatedCatalog.models = updatedCatalog.models.filter(m => m.id !== 'model1'); await catalogManager.removeUserModel('model1'); expect(promises.writeFile).toBeCalledWith( 'path', expect.stringContaining(`"version": "${catalogUtils.CatalogFormat.CURRENT}"`), 'utf-8', ); }); test('catalog should be the combination of user catalog and default catalog', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); vi.mocked(path.resolve).mockReturnValue('path'); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().length > userContent.models.length); const mtimeDate = new Date('2024-04-03T09:51:15.766Z'); vi.mocked(promises.stat).mockResolvedValue({ size: 1, mtime: mtimeDate, } as Stats); vi.mocked(path.resolve).mockReturnValue('path'); const catalog = catalogManager.getCatalog(); expect(catalog).toEqual({ version: catalogUtils.CatalogFormat.CURRENT, recipes: [...content.recipes, ...userContent.recipes], models: [...content.models, ...userContent.models], categories: [...content.categories, ...userContent.categories], }); }); test('catalog should use user items in favour of default', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(path.resolve).mockReturnValue('path'); const overwriteFullCatalog: ApplicationCatalog = { version: catalogUtils.CatalogFormat.CURRENT, recipes: content.recipes.map(recipe => ({ ...recipe, name: 'user-recipe-overwrite', })), models: content.models.map(model => ({ ...model, name: 'user-model-overwrite', })), categories: content.categories.map(category => ({ ...category, name: 'user-model-overwrite', })), }; vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(overwriteFullCatalog)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().length > 0); const mtimeDate = new Date('2024-04-03T09:51:15.766Z'); vi.mocked(promises.stat).mockResolvedValue({ size: 1, mtime: mtimeDate, } as Stats); vi.mocked(path.resolve).mockReturnValue('path'); const catalog = catalogManager.getCatalog(); expect(catalog).toEqual(overwriteFullCatalog); }); test('default catalog should have latest version', () => { expect(version).toBe(catalogUtils.CatalogFormat.CURRENT); }); test('wrong catalog version should create a notification', () => { catalogManager['onUserCatalogUpdate']({ version: catalogUtils.CatalogFormat.UNKNOWN }); expect(window.showNotification).toHaveBeenCalledWith( expect.objectContaining({ title: 'Incompatible user-catalog', }), ); }); test('malformed catalog should create a notification', async () => { vi.mocked(existsSync).mockReturnValue(false); vi.mocked(path.resolve).mockReturnValue('path'); catalogManager['onUserCatalogUpdate']({ version: catalogUtils.CatalogFormat.CURRENT, models: [ { fakeProperty: 'hello', }, ], recipes: [], categories: [], }); expect(window.showNotification).toHaveBeenCalledWith( expect.objectContaining({ title: 'Error loading the user catalog', body: 'Something went wrong while trying to load the user catalog: Error: invalid model format', }), ); }); describe('spy on catalogUtils.sanitize', () => { beforeEach(() => { // do not mock the complete catalogUtils module but only spy the `sanitize` function, // as we want to keep the original `catalogUtils.hasCatalogWrongFormat` function vi.spyOn(catalogUtils, 'sanitize'); }); afterEach(() => { vi.mocked(catalogUtils.sanitize).mockRestore(); }); test('catalog with undefined version should call sanitize function to try converting it', () => { vi.mocked(promises.writeFile).mockResolvedValue(); catalogManager['onUserCatalogUpdate']({ recipes: [ { id: 'chatbot', description: 'This is a Streamlit chat demo application.', name: 'ChatBot', repository: 'https://github.com/containers/ai-lab-recipes', ref: 'v1.1.3', icon: 'natural-language-processing', categories: ['natural-language-processing'], basedir: 'recipes/natural_language_processing/chatbot', readme: '', models: ['hf.instructlab.granite-7b-lab-GGUF', 'hf.instructlab.merlinite-7b-lab-GGUF'], }, ], models: [], }); expect(catalogUtils.sanitize).toHaveBeenCalled(); expect(promises.writeFile).toHaveBeenCalled(); }); }); test('filter recipes by language', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1')); const result1 = catalogManager.filterRecipes({ languages: ['lang1'], }); expect(result1.result.map(r => r.id)).toEqual(['recipe1']); expect(result1.choices).toEqual({ languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 2 }, { name: 'lang11', count: 1 }, { name: 'lang2', count: 1 }, { name: 'lang3', count: 1 }, ], frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 1 }, ], tools: [{ name: 'tool1', count: 1 }], }); const result2 = catalogManager.filterRecipes({ languages: ['lang2'], }); expect(result2.result.map(r => r.id)).toEqual(['recipe2']); expect(result2.choices).toEqual({ languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 2 }, { name: 'lang11', count: 1 }, { name: 'lang2', count: 1 }, { name: 'lang3', count: 1 }, ], frameworks: [ { name: 'fw10', count: 1 }, { name: 'fw2', count: 1 }, ], tools: [{ name: 'tool2', count: 1 }], }); }); test('filter recipes by tool', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1')); const result1 = catalogManager.filterRecipes({ tools: ['tool1'], }); expect(result1.result.map(r => r.id)).toEqual(['recipe1']); expect(result1.choices).toEqual({ frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 1 }, ], languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 1 }, ], tools: [ { name: 'tool1', count: 1 }, { name: 'tool2', count: 1 }, { name: 'tool3', count: 1 }, ], }); const result2 = catalogManager.filterRecipes({ tools: ['tool2'], }); expect(result2.result.map(r => r.id)).toEqual(['recipe2']); expect(result2.choices).toEqual({ frameworks: [ { name: 'fw10', count: 1 }, { name: 'fw2', count: 1 }, ], languages: [ { name: 'lang10', count: 1 }, { name: 'lang2', count: 1 }, ], tools: [ { name: 'tool1', count: 1 }, { name: 'tool2', count: 1 }, { name: 'tool3', count: 1 }, ], }); const result3 = catalogManager.filterRecipes({ tools: ['tool1', 'tool2'], }); expect(result3.result.map(r => r.id)).toEqual(['recipe1', 'recipe2']); expect(result3.choices).toEqual({ frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 2 }, { name: 'fw2', count: 1 }, ], languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 2 }, { name: 'lang2', count: 1 }, ], tools: [ { name: 'tool1', count: 1 }, { name: 'tool2', count: 1 }, { name: 'tool3', count: 1 }, ], }); }); test('filter recipes by framework', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1')); const result1 = catalogManager.filterRecipes({ frameworks: ['fw1'], }); expect(result1.result.map(r => r.id)).toEqual(['recipe1']); expect(result1.choices).toEqual({ languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 1 }, ], frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 3 }, { name: 'fw11', count: 1 }, { name: 'fw2', count: 2 }, ], tools: [{ name: 'tool1', count: 1 }], }); const result2 = catalogManager.filterRecipes({ frameworks: ['fw2'], }); expect(result2.result.map(r => r.id)).toEqual(['recipe2', 'recipe3']); expect(result2.choices).toEqual({ languages: [ { name: 'lang10', count: 1 }, { name: 'lang11', count: 1 }, { name: 'lang2', count: 1 }, { name: 'lang3', count: 1 }, ], frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 3 }, { name: 'fw11', count: 1 }, { name: 'fw2', count: 2 }, ], tools: [ { name: 'tool2', count: 1 }, { name: 'tool3', count: 1 }, ], }); const result3 = catalogManager.filterRecipes({ frameworks: ['fw1', 'fw2'], }); expect(result3.result.map(r => r.id)).toEqual(['recipe1', 'recipe2', 'recipe3']); expect(result3.choices).toEqual({ languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 2 }, { name: 'lang11', count: 1 }, { name: 'lang2', count: 1 }, { name: 'lang3', count: 1 }, ], frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 3 }, { name: 'fw11', count: 1 }, { name: 'fw2', count: 2 }, ], tools: [ { name: 'tool1', count: 1 }, { name: 'tool2', count: 1 }, { name: 'tool3', count: 1 }, ], }); }); test('filter recipes by language and framework', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1')); const result1 = catalogManager.filterRecipes({ languages: ['lang2'], frameworks: ['fw2'], }); expect(result1.result.map(r => r.id)).toEqual(['recipe2']); expect(result1.choices).toEqual({ languages: [ { name: 'lang10', count: 1 }, { name: 'lang11', count: 1 }, { name: 'lang2', count: 1 }, { name: 'lang3', count: 1 }, ], frameworks: [ { name: 'fw10', count: 1 }, { name: 'fw2', count: 1 }, ], tools: [{ name: 'tool2', count: 1 }], }); }); test('filter recipes by language, tool and framework', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); await catalogManager.init(); await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1')); const result1 = catalogManager.filterRecipes({ languages: ['lang1'], tools: ['tool1'], frameworks: ['fw1'], }); expect(result1.result.map(r => r.id)).toEqual(['recipe1']); expect(result1.choices).toEqual({ languages: [ { name: 'lang1', count: 1 }, { name: 'lang10', count: 1 }, ], frameworks: [ { name: 'fw1', count: 1 }, { name: 'fw10', count: 1 }, ], tools: [{ name: 'tool1', count: 1 }], }); }); test('models are loaded as soon as init is finished when no user catalog', async () => { await catalogManager.init(); expect(catalogManager.getModels()).toHaveLength(3); }); test('models are loaded as soon as init is finished when user catalog exists', async () => { vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent)); vi.mocked(existsSync).mockReturnValue(true); await catalogManager.init(); expect(catalogManager.getModels()).toHaveLength(5); }); ================================================ FILE: packages/backend/src/managers/catalogManager.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog'; import fs, { promises } from 'node:fs'; import path from 'node:path'; import crypto from 'node:crypto'; import defaultCatalog from '../assets/ai.json'; import type { Recipe } from '@shared/models/IRecipe'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { MSG_NEW_CATALOG_STATE } from '@shared/Messages'; import { type Disposable, type Event, EventEmitter, window } from '@podman-desktop/api'; import { JsonWatcher } from '../utils/JsonWatcher'; import { Publisher } from '../utils/Publisher'; import type { LocalModelImportInfo } from '@shared/models/ILocalModelInfo'; import { InferenceType } from '@shared/models/IInference'; import { CatalogFormat, hasCatalogWrongFormat, merge, sanitize } from '../utils/catalogUtils'; import type { FilterRecipesResult, RecipeChoices, RecipeFilters } from '@shared/models/FilterRecipesResult'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export const USER_CATALOG = 'user-catalog.json'; export class CatalogManager extends Publisher implements Disposable { private readonly _onUpdate = new EventEmitter(); readonly onUpdate: Event = this._onUpdate.event; private catalog: ApplicationCatalog; #jsonWatcher: JsonWatcher | undefined; #notification: Disposable | undefined; constructor( rpcExtension: RpcExtension, private appUserDirectory: string, ) { super(rpcExtension, MSG_NEW_CATALOG_STATE, () => this.getCatalog()); // We start with an empty catalog, for the methods to work before the catalog is loaded this.catalog = { version: CatalogFormat.CURRENT, categories: [], models: [], recipes: [], }; } /** * The init method will start a watcher on the user catalog.json */ async init(): Promise { return new Promise(resolve => { // Creating a json watcher this.#jsonWatcher = new JsonWatcher(this.getUserCatalogPath(), { version: CatalogFormat.CURRENT, recipes: [], models: [], categories: [], }); this.#jsonWatcher.onContentUpdated(content => { this.onUserCatalogUpdate(content); resolve(); }); this.#jsonWatcher.init(); }); } private loadDefaultCatalog(): void { this.catalog = defaultCatalog as ApplicationCatalog; this.notify(); } private onUserCatalogUpdate(content: unknown): void { // if there is no version in the user catalog, we try to sanitize it // most likely it can be converted automatically to the current version without showing any notification to the user if (content && typeof content === 'object' && hasCatalogWrongFormat(content)) { try { content = sanitize(content); // overwrite the catalog on disk const userCatalogPath = this.getUserCatalogPath(); promises.writeFile(userCatalogPath, JSON.stringify(content, undefined, 2), 'utf-8').catch((err: unknown) => { console.error('Something went wrong while trying to save catalog', err); }); } catch (e) { console.error(e); } } if (!content || typeof content !== 'object') { this.loadDefaultCatalog(); return; } // Get the user-catalog version let userCatalogFormat: string = CatalogFormat.UNKNOWN; if ('version' in content && typeof content.version === 'string') { userCatalogFormat = content.version; } if (userCatalogFormat !== CatalogFormat.CURRENT) { this.loadDefaultCatalog(); if (!this.#notification) { this.#notification = window.showNotification({ type: 'error', title: 'Incompatible user-catalog', body: `The catalog is using an older version of the catalog incompatible with current version ${CatalogFormat.CURRENT}.`, markdownActions: ':button[See migration guide]{href=https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/MIGRATION.md title="Migration guide"}', }); } console.error( `the user-catalog provided is using version ${userCatalogFormat} expected ${CatalogFormat.CURRENT}. You can follow the migration guide.`, ); return; } // merging default catalog with user catalog try { this.catalog = merge(sanitize(defaultCatalog), sanitize({ ...content, version: userCatalogFormat })); // reset notification if everything went smoothly this.#notification?.dispose(); this.#notification = undefined; } catch (err: unknown) { if (!this.#notification) { this.#notification = window.showNotification({ type: 'error', title: 'Error loading the user catalog', body: `Something went wrong while trying to load the user catalog: ${String(err)}`, }); } console.error(err); this.loadDefaultCatalog(); } this.notify(); } override notify(): void { super.notify(); this._onUpdate.fire(this.getCatalog()); } dispose(): void { this.#jsonWatcher?.dispose(); this.#notification?.dispose(); } public getCatalog(): ApplicationCatalog { return this.catalog; } public getModels(): ModelInfo[] { return this.catalog.models; } public getModelById(modelId: string): ModelInfo { const model = this.getModels().find(m => modelId === m.id); if (!model) { throw new Error(`No model found having id ${modelId}`); } return model; } public getModelByName(modelName: string): ModelInfo { const model = this.getModels().find(m => modelName === m.name); if (!model) { throw new Error(`No model found having name ${modelName}`); } return model; } public getRecipes(): Recipe[] { return this.catalog.recipes; } public getRecipeById(recipeId: string): Recipe { const recipe = this.getRecipes().find(r => recipeId === r.id); if (!recipe) { throw new Error(`No recipe found having id ${recipeId}`); } return recipe; } /** * This method is used to imports user's local models. * @param localModels the models to imports */ async importUserModels(localModels: LocalModelImportInfo[]): Promise { const userCatalogPath = this.getUserCatalogPath(); let content: ApplicationCatalog; // check if we already have an existing user's catalog if (fs.existsSync(userCatalogPath)) { const raw = await promises.readFile(userCatalogPath, 'utf-8'); content = sanitize(JSON.parse(raw)); } else { content = { version: CatalogFormat.CURRENT, recipes: [], models: [], categories: [], }; } // Transform local models into ModelInfo const models: ModelInfo[] = await Promise.all( localModels.map(async local => { const statFile = await promises.stat(local.path); const sha256 = crypto.createHash('sha256').update(local.path).digest('hex'); return { id: sha256, name: local.name, description: `Model imported from ${local.path}`, file: { path: path.dirname(local.path), file: path.basename(local.path), size: statFile.size, creation: statFile.mtime, }, memory: statFile.size, backend: local.backend ?? InferenceType.NONE, }; }), ); // Add all our models infos to the user's models catalog content.models.push(...models); // ensure parent directory exists await promises.mkdir(path.dirname(userCatalogPath), { recursive: true }); // overwrite the existing catalog return promises.writeFile(userCatalogPath, JSON.stringify(content, undefined, 2), 'utf-8'); } /** * Remove a model from the user's catalog. * @param modelId */ async removeUserModel(modelId: string): Promise { const userCatalogPath = this.getUserCatalogPath(); if (!fs.existsSync(userCatalogPath)) { throw new Error('User catalog does not exist.'); } const raw = await promises.readFile(userCatalogPath, 'utf-8'); const content = sanitize(JSON.parse(raw)); return promises.writeFile( userCatalogPath, JSON.stringify( { version: content.version, recipes: content.recipes, models: content.models.filter(model => model.id !== modelId), categories: content.categories, }, undefined, 2, ), 'utf-8', ); } /** * Return the path to the user catalog */ private getUserCatalogPath(): string { return path.resolve(this.appUserDirectory, USER_CATALOG); } public filterRecipes(filters: RecipeFilters): FilterRecipesResult { let result = this.getRecipes(); for (const [filter, values] of Object.entries(filters)) { switch (filter) { case 'languages': { let res: Recipe[] = []; for (const value of values) { res = [...res, ...result.filter(r => r.languages?.includes(value))]; } result = res; break; } case 'tools': result = result.filter(r => values.includes(r.backend ?? '')); break; case 'frameworks': { let res: Recipe[] = []; for (const value of values) { res = [...res, ...result.filter(r => r.frameworks?.includes(value))]; } result = res; break; } } } const choices: RecipeChoices = {}; if ('languages' in filters) { const subfilters = structuredClone(filters); delete subfilters.languages; choices.languages = this.filterRecipes(subfilters).choices.languages; } else { choices.languages = result .flatMap(r => r.languages) .filter(l => l !== undefined) .filter((value, index, array) => array.indexOf(value) === index) .sort((a, b) => a.localeCompare(b)) .map(l => ({ name: l, count: result.filter(r => r.languages?.includes(l)).length, })); } if ('tools' in filters) { const subfilters = structuredClone(filters); delete subfilters.tools; choices.tools = this.filterRecipes(subfilters).choices.tools; } else { choices.tools = result .map(r => r.backend) .filter(b => b !== undefined) .filter((value, index, array) => array.indexOf(value) === index) .sort((a, b) => a.localeCompare(b)) .map(t => ({ name: t, count: result.filter(r => r.backend === t).length, })); } if ('frameworks' in filters) { const subfilters = structuredClone(filters); delete subfilters.frameworks; choices.frameworks = this.filterRecipes(subfilters).choices.frameworks; } else { choices.frameworks = result .flatMap(r => r.frameworks) .filter(f => f !== undefined) .filter((value, index, array) => array.indexOf(value) === index) .sort((a, b) => a.localeCompare(b)) .map(f => ({ name: f, count: result.filter(r => r.frameworks?.includes(f)).length, })); } return { filters, choices, result, }; } } ================================================ FILE: packages/backend/src/managers/gitManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { describe, expect, test, vi, beforeEach } from 'vitest'; import { GitManager } from './gitManager'; import { statSync, existsSync, mkdirSync, type Stats, rmSync } from 'node:fs'; import { window } from '@podman-desktop/api'; import type { ReadCommitResult } from 'isomorphic-git'; import git from 'isomorphic-git'; vi.mock('isomorphic-git', () => { return { default: { clone: vi.fn(), currentBranch: vi.fn(), log: vi.fn(), resolveRef: vi.fn(), fetch: vi.fn(), getConfig: vi.fn(), statusMatrix: vi.fn(), }, }; }); vi.mock('node:fs'); vi.mock('@podman-desktop/api', async () => { return { window: { showWarningMessage: vi.fn(), }, }; }); beforeEach(() => { vi.resetAllMocks(); vi.mocked(git.resolveRef).mockResolvedValue('dummyCommit'); }); describe('cloneRepository', () => { const gitmanager = new GitManager(); test('clone and checkout if ref is specified', async () => { await gitmanager.cloneRepository({ repository: 'repo', targetDirectory: 'target', ref: '000', }); expect(git.clone).toBeCalledWith({ fs: expect.anything(), http: expect.anything(), url: 'repo', dir: 'target', ref: '000', singleBranch: true, depth: 1, }); }); test('clone and checkout if ref is NOT specified', async () => { await gitmanager.cloneRepository({ repository: 'repo', targetDirectory: 'target', }); expect(git.clone).toBeCalledWith({ fs: expect.anything(), http: expect.anything(), url: 'repo', dir: 'target', ref: undefined, singleBranch: true, depth: 1, }); }); }); describe('processCheckout', () => { test('first install no existing folder', async () => { vi.mocked(existsSync).mockReturnValue(false); await new GitManager().processCheckout({ repository: 'repo', targetDirectory: 'target', ref: '000', }); expect(existsSync).toHaveBeenCalledWith('target'); expect(mkdirSync).toHaveBeenCalledWith('target', { recursive: true }); expect(git.clone).toBeCalledWith({ fs: expect.anything(), http: expect.anything(), url: 'repo', dir: 'target', ref: '000', singleBranch: true, depth: 1, }); }); test('existing folder valid', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: true }); await gitmanager.processCheckout({ repository: 'repo', targetDirectory: 'target', ref: '000', }); expect(gitmanager.isRepositoryUpToDate).toHaveBeenCalled(); expect(existsSync).toHaveBeenCalledWith('target'); expect(statSync).toHaveBeenCalledWith('target'); expect(mkdirSync).not.toHaveBeenCalled(); expect(git.clone).not.toHaveBeenCalled(); }); test('existing folder detached and user cancel', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(window.showWarningMessage).mockResolvedValue('Cancel'); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: false }); await expect( gitmanager.processCheckout({ repository: 'repo', targetDirectory: 'target', ref: '000', }), ).rejects.toThrowError('Cancelled'); }); test('existing folder not-updatable and user continue', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(window.showWarningMessage).mockResolvedValue('Continue'); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: false }); await gitmanager.processCheckout({ repository: 'repo', targetDirectory: 'target', ref: '000', }); expect(rmSync).not.toHaveBeenCalled(); expect(mkdirSync).not.toHaveBeenCalled(); expect(git.clone).not.toHaveBeenCalled(); }); test('existing folder not-updatable and user reset', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(window.showWarningMessage).mockResolvedValue('Reset'); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: false }); await gitmanager.processCheckout({ repository: 'repo', targetDirectory: 'target', ref: '000', }); expect(window.showWarningMessage).toHaveBeenCalledWith(expect.anything(), 'Cancel', 'Continue', 'Reset'); expect(rmSync).toHaveBeenCalledWith('target', { recursive: true }); }); test('existing folder updatable and user update', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(window.showWarningMessage).mockResolvedValue('Update'); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: true }); vi.spyOn(gitmanager, 'pull').mockResolvedValue(undefined); await gitmanager.processCheckout({ repository: 'repo', targetDirectory: 'target', ref: '000', }); expect(window.showWarningMessage).toHaveBeenCalledWith(expect.anything(), 'Cancel', 'Continue', 'Update'); expect(rmSync).not.toHaveBeenCalled(); expect(gitmanager.pull).toHaveBeenCalled(); }); }); describe('isRepositoryUpToDate', () => { test('no remote defined', async () => { const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'other-repo', }, ]); const result = await gitmanager.isRepositoryUpToDate('target', 'repo'); expect(result.ok).toBeFalsy(); expect(result.error).toBe( 'The local repository does not have remote repo configured. Remotes: origin other-repo (fetch)', ); }); test('detached invalid without ref', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.mocked(git.currentBranch).mockResolvedValue(undefined); const result = await gitmanager.isRepositoryUpToDate('target', 'repo'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository is detached.'); }); test('detached invalid with invalid ref', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTagCommitId').mockResolvedValue(undefined); // ref is not a tag vi.mocked(git.currentBranch).mockResolvedValue(undefined); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'invalidRef'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository is detached. HEAD is dummyCommit expected invalidRef.'); }); test('detached invalid with expected ref', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.mocked(git.statusMatrix).mockResolvedValue([['a', 1, 1, 1]]); vi.mocked(git.currentBranch).mockResolvedValue(undefined); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit'); expect(result.ok).toBeTruthy(); expect(result.error).toBeUndefined(); }); test('detached with expected ref and modified files', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.mocked(git.statusMatrix).mockResolvedValue([ ['a', 1, 1, 1], ['a_file', 1, 2, 1], ]); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has modified files.'); }); test('detached with expected ref and deleted files', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.mocked(git.statusMatrix).mockResolvedValue([ ['a', 1, 1, 1], ['a_file', 1, 0, 1], ]); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has deleted files.'); }); test('detached with expected ref and created files', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.mocked(git.statusMatrix).mockResolvedValue([ ['a', 1, 1, 1], ['a_file', 0, 2, 2], ]); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has created files.'); }); test('detached with expected ref and repository is not clean', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: false, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository is not clean.'); }); test('using main branch and no local change', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeTruthy(); }); test('using main branch and tracking wrong branch', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/other-branch'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeFalsy(); expect(result.error).toBe( 'The local repository is not tracking the right branch. (tracking origin/other-branch when expected main)', ); }); test('using main branch and ahead', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 1, ahead: 2 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has 2 commit(s) ahead.'); }); test('using main branch and behind', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 1, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeTruthy(); expect(result.updatable).toBeTruthy(); }); test('using main branch and modified files', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: ['a_modified_file.txt'], created: [], deleted: [], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has modified files.'); }); test('using main branch and deleted files', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: ['a_deleted_file.txt'], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has deleted files.'); }); test('using main branch and created files', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: ['a_created_file.txt'], deleted: [], clean: true, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository has created files.'); }); test('using main branch and repository is not clean', async () => { const gitmanager = new GitManager(); vi.mocked(git.currentBranch).mockResolvedValue('main'); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 }); vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: false, }); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository is not clean.'); }); }); test('using tag and no local change', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTagCommitId').mockResolvedValue('dummyCommit'); // ref is a tag and points to commit vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: true, }); vi.mocked(git.currentBranch).mockResolvedValue(undefined); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'v1.0.0'); expect(result.ok).toBeTruthy(); }); test('using wrong tag', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); const gitmanager = new GitManager(); vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([ { remote: 'origin', url: 'repo', }, ]); vi.spyOn(gitmanager, 'getTagCommitId').mockResolvedValue('otherCommit'); // ref is a tag and points to commit vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({ modified: [], created: [], deleted: [], clean: true, }); vi.mocked(git.currentBranch).mockResolvedValue(undefined); const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'v1.0.0'); expect(result.ok).toBeFalsy(); expect(result.error).toBe('The local repository is detached. HEAD is dummyCommit expected otherCommit.'); }); test('getBehindAhead', async () => { const gitmanager = new GitManager(); vi.mocked(git.log).mockImplementation(async ({ ref }: { ref?: string }) => { return new Promise(resolve => { if (ref === 'main') { resolve([ { oid: '1', }, { oid: '6', }, { oid: '2', }, { oid: '3', }, ] as ReadCommitResult[]); } else if (ref === 'origin/main') { resolve([ { oid: '1', }, { oid: '4', }, { oid: '2', }, { oid: '5', }, { oid: '3', }, ] as ReadCommitResult[]); } else { resolve([]); } }); }); vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main'); const { behind, ahead } = await gitmanager.getBehindAhead('path/to/repo', 'main'); expect(behind).toEqual(2); expect(ahead).toEqual(1); }); test('getTrackingBranch', async () => { const gitmanager = new GitManager(); vi.mocked(git.getConfig).mockImplementation(async ({ path }: { path: string }): Promise => { if (path === 'branch.my-branch.remote') { return 'origin'; } else if (path === 'branch.my-branch.merge') { return 'refs/heads/my-remote-branch'; } throw new Error('should never been reached'); }); const result = await gitmanager.getTrackingBranch('path/to/repository', 'my-branch'); expect(result).toEqual('origin/my-remote-branch'); }); ================================================ FILE: packages/backend/src/managers/gitManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { window } from '@podman-desktop/api'; import fs, { statSync, existsSync, mkdirSync, rmSync } from 'node:fs'; import git from 'isomorphic-git'; import http from 'isomorphic-git/http/node'; export interface GitCloneInfo { repository: string; ref?: string; targetDirectory: string; } export class GitManager { async cloneRepository(gitCloneInfo: GitCloneInfo): Promise { // clone repo await git.clone({ fs, http, dir: gitCloneInfo.targetDirectory, url: gitCloneInfo.repository, ref: gitCloneInfo.ref, singleBranch: true, depth: 1, }); } async getRepositoryRemotes(directory: string): Promise< { remote: string; url: string; }[] > { return git.listRemotes({ fs, dir: directory }); } /* see https://isomorphic-git.org/docs/en/statusMatrix * * - The HEAD status is either absent (0) or present (1). * - The WORKDIR status is either absent (0), identical to HEAD (1), or different from HEAD (2). * - The STAGE status is either absent (0), identical to HEAD (1), identical to WORKDIR (2), or different from WORKDIR (3). * * // example StatusMatrix * [ * ["a.txt", 0, 2, 0], // new, untracked * ["b.txt", 0, 2, 2], // added, staged * ["c.txt", 0, 2, 3], // added, staged, with unstaged changes * ["d.txt", 1, 1, 1], // unmodified * ["e.txt", 1, 2, 1], // modified, unstaged * ["f.txt", 1, 2, 2], // modified, staged * ["g.txt", 1, 2, 3], // modified, staged, with unstaged changes * ["h.txt", 1, 0, 1], // deleted, unstaged * ["i.txt", 1, 0, 0], // deleted, staged * ["j.txt", 1, 2, 0], // deleted, staged, with unstaged-modified changes (new file of the same name) * ["k.txt", 1, 1, 0], // deleted, staged, with unstaged changes (new file of the same name) * ] */ async getRepositoryStatus(directory: string): Promise<{ modified: string[]; created: string[]; deleted: string[]; clean: boolean; }> { const status = await git.statusMatrix({ fs, dir: directory, }); const FILE = 0, HEAD = 1, WORKDIR = 2, STAGE = 3; const created = status.filter(row => row[HEAD] === 0 && row[WORKDIR] === 2).map(row => row[FILE]); const deleted = status .filter(row => row[HEAD] === 1 && (row[WORKDIR] === 0 || row[STAGE] === 0)) .map(row => row[FILE]); const modified = status.filter(row => row[HEAD] === 1 && row[WORKDIR] === 2).map(row => row[FILE]); const notClean = status.filter(row => row[HEAD] !== 1 || row[WORKDIR] !== 1 || row[STAGE] !== 1); return { modified, created, deleted, clean: notClean.length === 0, }; } async getCurrentCommit(directory: string): Promise { return git.resolveRef({ fs, dir: directory, ref: 'HEAD' }); } async pull(directory: string): Promise { return git.pull({ fs, http, dir: directory, }); } async processCheckout(gitCloneInfo: GitCloneInfo): Promise { // Check for existing cloned repository if (existsSync(gitCloneInfo.targetDirectory) && statSync(gitCloneInfo.targetDirectory).isDirectory()) { const result = await this.isRepositoryUpToDate( gitCloneInfo.targetDirectory, gitCloneInfo.repository, gitCloneInfo.ref, ); if (result.ok) { return; } const error = `The repository "${gitCloneInfo.repository}" appears to have already been cloned and does not match the expected configuration: ${result.error}`; // Ask user const selected = await window.showWarningMessage( `${error} By continuing, the AI application may not run as expected. `, 'Cancel', 'Continue', result.updatable ? 'Update' : 'Reset', ); switch (selected) { case undefined: case 'Cancel': throw new Error('Cancelled'); case 'Continue': return; case 'Update': await this.pull(gitCloneInfo.targetDirectory); return; case 'Reset': rmSync(gitCloneInfo.targetDirectory, { recursive: true }); break; } } // Create folder mkdirSync(gitCloneInfo.targetDirectory, { recursive: true }); // Clone the repository console.log(`Cloning repository ${gitCloneInfo.repository} in ${gitCloneInfo.targetDirectory}.`); await this.cloneRepository(gitCloneInfo); } async isRepositoryUpToDate( directory: string, origin: string, ref?: string, ): Promise<{ ok?: boolean; updatable?: boolean; error?: string }> { // fetch updates await git.fetch({ fs, http, dir: directory, }); const remotes = await this.getRepositoryRemotes(directory); if (!remotes.some(remote => remote.url === origin)) { return { error: `The local repository does not have remote ${origin} configured. Remotes: ${remotes .map(remote => `${remote.remote} ${remote.url} (fetch)`) .join(',')}`, }; } const branch = await git.currentBranch({ fs, dir: directory, }); if (!branch) { // when the repository is detached if (ref === undefined) { return { error: 'The local repository is detached.' }; } else { const tag = await this.getTagCommitId(directory, ref); if (tag) { ref = tag; } const commit = await this.getCurrentCommit(directory); if (!commit.startsWith(ref)) { return { error: `The local repository is detached. HEAD is ${commit} expected ${ref}.` }; } } } if (branch) { const tracking = await this.getTrackingBranch(directory, branch); if (ref && tracking !== `origin/${ref}`) { return { error: `The local repository is not tracking the right branch. (tracking ${tracking} when expected ${ref})`, }; } const { behind, ahead } = await this.getBehindAhead(directory, branch); if (ahead !== 0) { return { error: `The local repository has ${ahead} commit(s) ahead.` }; } if (behind !== 0) { return { ok: true, updatable: true }; } } const status = await this.getRepositoryStatus(directory); if (status.modified.length > 0) { return { error: 'The local repository has modified files.' }; } else if (status.created.length > 0) { return { error: 'The local repository has created files.' }; } else if (status.deleted.length > 0) { return { error: 'The local repository has deleted files.' }; } else if (!status.clean) { return { error: 'The local repository is not clean.' }; } return { ok: true }; // If none of the error conditions are met } async getTrackingBranch(directory: string, branch: string): Promise { const mergeRef = await git.getConfig({ fs, dir: directory, path: `branch.${branch}.merge`, }); const remote = await git.getConfig({ fs, dir: directory, path: `branch.${branch}.remote`, }); return mergeRef && remote ? `${remote}/${mergeRef.replace(/^refs\/heads\//, '')}` : undefined; } async getBehindAhead(dir: string, localBranch: string): Promise<{ behind: number; ahead: number }> { const remoteBranch = await this.getTrackingBranch(dir, localBranch); const remoteCommits = ( await git.log({ fs, dir, ref: remoteBranch, }) ) .map(c => c.oid) .sort((a, b) => a.localeCompare(b)); const localCommits = ( await git.log({ fs, dir, ref: localBranch, }) ) .map(c => c.oid) .sort((a, b) => a.localeCompare(b)); let behind = 0; let ahead = 0; while (remoteCommits.length && localCommits.length) { const remote = remoteCommits.pop(); const local = localCommits.pop(); if (!remote || !local) { break; } if (remote === local) { continue; } if (remote > local) { behind++; localCommits.push(local); } else { ahead++; remoteCommits.push(remote); } } return { behind: behind + remoteCommits.length, ahead: ahead + localCommits.length, }; } async getTagCommitId(directory: string, tagName: string): Promise { try { return await git.resolveRef({ fs, dir: directory, ref: tagName, }); } catch { return undefined; } } } ================================================ FILE: packages/backend/src/managers/inference/inferenceManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { containerEngine, type ContainerInfo, type ContainerInspectInfo, type TelemetryLogger, } from '@podman-desktop/api'; import type { ContainerRegistry } from '../../registries/ContainerRegistry'; import type { PodmanConnection } from '../podmanConnection'; import { beforeEach, describe, expect, test, vi } from 'vitest'; import { InferenceManager } from './inferenceManager'; import type { ModelsManager } from '../modelsManager'; import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { InferenceProviderRegistry } from '../../registries/InferenceProviderRegistry'; import type { InferenceProvider } from '../../workers/provider/InferenceProvider'; import type { CatalogManager } from '../catalogManager'; import type { InferenceServer } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; import { VMType } from '@shared/models/IPodman'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_INFERENCE_SERVERS_UPDATE } from '@shared/Messages'; import * as randomUtils from '../../utils/randomUtils'; import type { Task } from '@shared/models/ITask'; vi.mock('@podman-desktop/api', async () => { return { containerEngine: { startContainer: vi.fn(), stopContainer: vi.fn(), inspectContainer: vi.fn(), deleteContainer: vi.fn(), listContainers: vi.fn(), }, Disposable: { from: vi.fn(), create: vi.fn(), }, }; }); vi.mock('../../utils/randomUtils'); const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; const containerRegistryMock = { onStartContainerEvent: vi.fn(), subscribe: vi.fn(), } as unknown as ContainerRegistry; const podmanConnectionMock = { onPodmanConnectionEvent: vi.fn(), findRunningContainerProviderConnection: vi.fn(), } as unknown as PodmanConnection; const modelsManager = { getLocalModelPath: vi.fn(), uploadModelToPodmanMachine: vi.fn(), } as unknown as ModelsManager; const telemetryMock = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; const taskRegistryMock = { createTask: vi.fn(), updateTask: vi.fn(), getTasksByLabels: vi.fn(), } as unknown as TaskRegistry; const inferenceProviderRegistryMock = { getAll: vi.fn(), getByType: vi.fn(), get: vi.fn(), } as unknown as InferenceProviderRegistry; const catalogManager = { onUpdate: vi.fn(), } as unknown as CatalogManager; const getInitializedInferenceManager = async (): Promise => { const manager = new InferenceManager( rpcExtensionMock, containerRegistryMock, podmanConnectionMock, modelsManager, telemetryMock, taskRegistryMock, inferenceProviderRegistryMock, catalogManager, ); manager.init(); await vi.waitUntil(manager.isInitialize.bind(manager), { interval: 200, timeout: 2000, }); return manager; }; const mockListContainers = (containers: Partial[]): void => { vi.mocked(containerEngine.listContainers).mockResolvedValue(containers as unknown as ContainerInfo[]); }; beforeEach(() => { vi.resetAllMocks(); // Default listContainers is empty mockListContainers([]); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); vi.mocked(containerEngine.inspectContainer).mockResolvedValue({ State: { Status: 'running', Health: undefined, }, } as unknown as ContainerInspectInfo); vi.mocked(podmanConnectionMock.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(taskRegistryMock.getTasksByLabels).mockReturnValue([]); vi.mocked(modelsManager.getLocalModelPath).mockReturnValue('/local/model.guff'); vi.mocked(modelsManager.uploadModelToPodmanMachine).mockResolvedValue('/mnt/path/model.guff'); }); /** * Testing the initialization of the manager */ describe('init Inference Manager', () => { test('should be initialized without catalog events', async () => { const manager = new InferenceManager( rpcExtensionMock, containerRegistryMock, podmanConnectionMock, modelsManager, telemetryMock, taskRegistryMock, inferenceProviderRegistryMock, catalogManager, ); manager.init(); await vi.waitUntil(manager.isInitialize.bind(manager), { interval: 200, timeout: 2000, }); }); test('should have listed containers', async () => { const inferenceManager = await getInitializedInferenceManager(); expect(inferenceManager.isInitialize()).toBeTruthy(); expect(containerEngine.listContainers).toHaveBeenCalled(); }); test('should ignore containers without the proper label', async () => { mockListContainers([ { Id: 'dummyId', }, ]); const inferenceManager = await getInitializedInferenceManager(); expect(inferenceManager.getServers().length).toBe(0); }); test('should have adopted the existing container', async () => { mockListContainers([ { Id: 'dummyContainerId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const inferenceManager = await getInitializedInferenceManager(); expect(inferenceManager.getServers()).toStrictEqual([ { connection: { port: -1, }, container: { containerId: 'dummyContainerId', engineId: 'dummyEngineId', }, health: undefined, models: [], status: 'running', type: expect.anything(), labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); }); test('should have adopted all existing container with proper label', async () => { mockListContainers([ { Id: 'dummyContainerId-1', engineId: 'dummyEngineId-1', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, { Id: 'dummyContainerId-2', engineId: 'dummyEngineId-2', }, { Id: 'dummyContainerId-3', engineId: 'dummyEngineId-3', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const inferenceManager = await getInitializedInferenceManager(); const servers = inferenceManager.getServers(); expect(servers.length).toBe(2); expect(servers.some(server => server.container.containerId === 'dummyContainerId-1')).toBeTruthy(); expect(servers.some(server => server.container.containerId === 'dummyContainerId-3')).toBeTruthy(); }); }); /** * Testing the creation logic */ describe('Create Inference Server', () => { test('no provider available should throw an error', async () => { vi.mocked(inferenceProviderRegistryMock.getByType).mockReturnValue([]); const inferenceManager = await getInitializedInferenceManager(); await expect( inferenceManager.createInferenceServer({ inferenceProvider: undefined, labels: {}, modelsInfo: [], port: 8888, }), ).rejects.toThrowError('no enabled provider could be found.'); }); test('inference provider provided should use get from InferenceProviderRegistry', async () => { vi.mocked(inferenceProviderRegistryMock.get).mockReturnValue({ enabled: () => false, } as unknown as InferenceProvider); const inferenceManager = await getInitializedInferenceManager(); await expect( inferenceManager.createInferenceServer({ inferenceProvider: 'dummy-inference-provider', labels: {}, modelsInfo: [], port: 8888, }), ).rejects.toThrowError('provider requested is not enabled.'); expect(inferenceProviderRegistryMock.get).toHaveBeenCalledWith('dummy-inference-provider'); }); test('selected inference provider should receive config', async () => { const provider: InferenceProvider = { enabled: () => true, name: 'dummy-inference-provider', dispose: () => {}, prePerform: vi.fn().mockReturnValue(Promise.resolve()), perform: vi.fn<() => InferenceServer>().mockResolvedValue({ container: { containerId: 'dummy-container-id', engineId: 'dummy-engine-id', }, models: [], status: 'running', type: InferenceType.LLAMA_CPP, connection: { port: 0 }, labels: {}, }), } as unknown as InferenceProvider; vi.mocked(inferenceProviderRegistryMock.get).mockReturnValue(provider); const inferenceManager = await getInitializedInferenceManager(); const config: InferenceServerConfig = { inferenceProvider: 'dummy-inference-provider', labels: {}, modelsInfo: [], port: 8888, }; const result = await inferenceManager.createInferenceServer(config); expect(provider.perform).toHaveBeenCalledWith(config); expect(result).toBe('dummy-container-id'); }); }); /** * Testing the starting logic */ describe('Start Inference Server', () => { test('containerId unknown', async () => { const inferenceManager = await getInitializedInferenceManager(); await expect(inferenceManager.startInferenceServer('unknownContainerId')).rejects.toThrowError( 'cannot find a corresponding server for container id unknownContainerId.', ); }); test('valid containerId', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.startInferenceServer('dummyId'); expect(containerEngine.startContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId'); const servers = inferenceManager.getServers(); expect(servers.length).toBe(1); expect(servers[0].status).toBe('running'); }); }); /** * Testing the stopping logic */ describe('Stop Inference Server', () => { test('containerId unknown', async () => { const inferenceManager = await getInitializedInferenceManager(); await expect(inferenceManager.stopInferenceServer('unknownContainerId')).rejects.toThrowError( 'cannot find a corresponding server for container id unknownContainerId.', ); }); test('valid containerId', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.stopInferenceServer('dummyId'); expect(containerEngine.stopContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId'); const servers = inferenceManager.getServers(); expect(servers.length).toBe(1); expect(servers[0].status).toBe('stopped'); }); }); describe('Delete Inference Server', () => { test('containerId unknown', async () => { const inferenceManager = await getInitializedInferenceManager(); await expect(inferenceManager.deleteInferenceServer('unknownContainerId')).rejects.toThrowError( 'cannot find a corresponding server for container id unknownContainerId.', ); }); test('valid running containerId', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.deleteInferenceServer('dummyId'); expect(containerEngine.stopContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId'); expect(containerEngine.deleteContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId'); const servers = inferenceManager.getServers(); expect(servers.length).toBe(0); }); test('valid stopped containerId', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); vi.mocked(containerEngine.inspectContainer).mockResolvedValue({ State: { Status: 'stopped', Health: undefined, }, } as unknown as ContainerInspectInfo); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.deleteInferenceServer('dummyId'); expect(containerEngine.stopContainer).not.toHaveBeenCalled(); expect(containerEngine.deleteContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId'); const servers = inferenceManager.getServers(); expect(servers.length).toBe(0); }); }); describe('Request Create Inference Server', () => { beforeEach(() => { vi.mocked(randomUtils.getRandomString).mockReturnValue('random123'); }); test('Should return unique string identifier', async () => { const inferenceManager = await getInitializedInferenceManager(); const identifier = inferenceManager.requestCreateInferenceServer({ port: 8888, providerId: 'test@providerId', image: 'quay.io/bootsy/playground:v0', modelsInfo: [ { id: 'dummyModelId', file: { file: 'dummyFile', path: 'dummyPath', }, }, ], } as unknown as InferenceServerConfig); expect(identifier).toBeDefined(); expect(typeof identifier).toBe('string'); }); test('Task registry should have tasks matching unique identifier provided', async () => { const inferenceManager = await getInitializedInferenceManager(); const identifier = inferenceManager.requestCreateInferenceServer({ port: 8888, providerId: 'test@providerId', image: 'quay.io/bootsy/playground:v0', modelsInfo: [ { id: 'dummyModelId', file: { file: 'dummyFile', path: 'dummyPath', }, }, ], } as unknown as InferenceServerConfig); expect(taskRegistryMock.createTask).toHaveBeenNthCalledWith(1, 'Creating Inference server', 'loading', { trackingId: identifier, }); }); test('all children tasks should be set as error when one fails', async () => { const inferenceManager = await getInitializedInferenceManager(); vi.mocked(taskRegistryMock.createTask).mockReturnValue({ id: 'task1', name: 'Task 1', state: 'loading', }); vi.spyOn(inferenceManager, 'createInferenceServer'); const otherTasks: Task[] = [ { id: 'subtask1', name: 'Sub task 1', state: 'loading', }, { id: 'subtask2', name: 'Sub task 2', state: 'loading', }, { id: 'subtask3', name: 'Sub task 3', state: 'error', }, ]; vi.mocked(taskRegistryMock.getTasksByLabels).mockReturnValue(otherTasks); vi.mocked(inferenceManager.createInferenceServer).mockRejectedValue('an error'); inferenceManager.requestCreateInferenceServer({ port: 8888, providerId: 'test@providerId', image: 'quay.io/bootsy/playground:v0', modelsInfo: [ { id: 'dummyModelId', file: { file: 'dummyFile', path: 'dummyPath', }, }, ], } as unknown as InferenceServerConfig); await vi.waitFor(() => { expect(taskRegistryMock.updateTask).toHaveBeenCalledTimes(3); }); expect(taskRegistryMock.updateTask).toHaveBeenNthCalledWith(1, { ...otherTasks[0], state: 'error' }); expect(taskRegistryMock.updateTask).toHaveBeenNthCalledWith(2, { ...otherTasks[1], state: 'error' }); expect(taskRegistryMock.updateTask).toHaveBeenNthCalledWith(3, { error: 'Something went wrong while trying to create an inference server an error.', id: 'task1', name: 'Task 1', state: 'error', }); }); }); describe('containerRegistry events', () => { test('container die event', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const disposableMock = vi.fn(); const deferred = new Promise<(status: string) => void>((resolve, reject) => { vi.mocked(containerRegistryMock.subscribe).mockImplementation((containerId, listener) => { if (containerId !== 'dummyId') reject(new Error('invalid container id')); else resolve(listener); return { dispose: disposableMock, }; }); }); const inferenceManager = await getInitializedInferenceManager(); const listener = await deferred; const server = inferenceManager.get('dummyId'); expect(server?.status).toBe('running'); expect(containerEngine.inspectContainer).toHaveBeenCalledOnce(); vi.mocked(containerEngine.inspectContainer).mockResolvedValue({ State: { Status: 'stopped', Health: undefined, }, } as unknown as ContainerInspectInfo); listener('die'); await vi.waitFor(() => { expect(inferenceManager.get('dummyId')?.status).toBe('stopped'); expect(containerEngine.inspectContainer).toHaveBeenCalledTimes(2); }); // we should not have disposed the subscriber, as the container is only stopped, not removed expect(disposableMock).not.toHaveBeenCalled(); }); test('container remove event', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); const disposableMock = vi.fn(); const deferred = new Promise<(status: string) => void>((resolve, reject) => { vi.mocked(containerRegistryMock.subscribe).mockImplementation((containerId, listener) => { if (containerId !== 'dummyId') reject(new Error('invalid container id')); else resolve(listener); return { dispose: disposableMock, }; }); }); const inferenceManager = await getInitializedInferenceManager(); const listener = await deferred; const server = inferenceManager.get('dummyId'); expect(server?.status).toBe('running'); listener('remove'); await vi.waitFor(() => { expect(inferenceManager.get('dummyId')).toBeUndefined(); }); // we should have disposed the subscriber, as the container is removed expect(disposableMock).toHaveBeenCalled(); }); }); describe('transition statuses', () => { test('stopping an inference server should first set status to stopping', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); vi.mocked(containerEngine.inspectContainer).mockResolvedValue({ State: { Status: 'running', Health: undefined, }, } as unknown as ContainerInspectInfo); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.stopInferenceServer('dummyId'); // first called with stopping status expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [ { connection: expect.anything(), container: expect.anything(), models: expect.anything(), health: undefined, status: 'stopping', type: expect.anything(), labels: expect.anything(), }, ]); // finally have been called with status stopped expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [ { connection: expect.anything(), container: expect.anything(), models: expect.anything(), health: undefined, status: 'stopped', type: expect.anything(), labels: expect.anything(), }, ]); }); test('deleting an inference server should first set status to stopping', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); vi.mocked(containerEngine.inspectContainer).mockResolvedValue({ State: { Status: 'running', Health: undefined, }, } as unknown as ContainerInspectInfo); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.deleteInferenceServer('dummyId'); expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [ { connection: expect.anything(), container: expect.anything(), models: expect.anything(), health: undefined, status: 'deleting', type: expect.anything(), labels: expect.anything(), }, ]); }); test('starting an inference server should first set status to stopping', async () => { mockListContainers([ { Id: 'dummyId', engineId: 'dummyEngineId', Labels: { [LABEL_INFERENCE_SERVER]: '[]', }, }, ]); vi.mocked(containerEngine.inspectContainer).mockResolvedValue({ State: { Status: 'stopped', Health: undefined, }, } as unknown as ContainerInspectInfo); const inferenceManager = await getInitializedInferenceManager(); await inferenceManager.startInferenceServer('dummyId'); // first status must be set to starting expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [ { connection: expect.anything(), container: expect.anything(), models: expect.anything(), health: undefined, status: 'starting', type: expect.anything(), labels: expect.anything(), }, ]); // on success it should have been set to running expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [ { connection: expect.anything(), container: expect.anything(), models: expect.anything(), health: undefined, status: 'running', type: expect.anything(), labels: expect.anything(), }, ]); }); }); ================================================ FILE: packages/backend/src/managers/inference/inferenceManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { InferenceServer, InferenceServerStatus, InferenceType } from '@shared/models/IInference'; import type { PodmanConnection, PodmanConnectionEvent } from '../podmanConnection'; import { containerEngine, Disposable } from '@podman-desktop/api'; import type { ContainerInfo, TelemetryLogger, ContainerProviderConnection } from '@podman-desktop/api'; import type { ContainerRegistry, ContainerEvent } from '../../registries/ContainerRegistry'; import { getInferenceType, isTransitioning, LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import { Publisher } from '../../utils/Publisher'; import { MSG_INFERENCE_SERVERS_UPDATE } from '@shared/Messages'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import type { ModelsManager } from '../modelsManager'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { getRandomString } from '../../utils/randomUtils'; import { basename, dirname } from 'node:path'; import type { InferenceProviderRegistry } from '../../registries/InferenceProviderRegistry'; import type { InferenceProvider } from '../../workers/provider/InferenceProvider'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { CatalogManager } from '../catalogManager'; import { getHash } from '../../utils/sha'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { TaskRunner } from '../TaskRunner'; export class InferenceManager extends Publisher implements Disposable { // Inference server map (containerId -> InferenceServer) #servers: Map; // Is initialized #initialized: boolean; // Disposables #disposables: Disposable[]; #taskRunner: TaskRunner; constructor( rpcExtension: RpcExtension, private containerRegistry: ContainerRegistry, private podmanConnection: PodmanConnection, private modelsManager: ModelsManager, private telemetry: TelemetryLogger, private taskRegistry: TaskRegistry, private inferenceProviderRegistry: InferenceProviderRegistry, private catalogManager: CatalogManager, ) { super(rpcExtension, MSG_INFERENCE_SERVERS_UPDATE, () => this.getServers()); this.#servers = new Map(); this.#disposables = []; this.#initialized = false; this.#taskRunner = new TaskRunner(this.taskRegistry); } init(): void { this.podmanConnection.onPodmanConnectionEvent(this.watchMachineEvent.bind(this)); this.containerRegistry.onStartContainerEvent(this.watchContainerStart.bind(this)); this.catalogManager.onUpdate(() => { this.retryableRefresh(1); }); this.retryableRefresh(3); } public isInitialize(): boolean { return this.#initialized; } /** * Cleanup the manager */ dispose(): void { this.cleanDisposables(); this.#servers.clear(); this.#initialized = false; } /** * Clean class disposables */ private cleanDisposables(): void { this.#disposables.forEach(disposable => disposable.dispose()); } /** * Get the Inference servers */ public getServers(): InferenceServer[] { return Array.from(this.#servers.values()); } /** * Get the Unique registered Inference provider types */ public getRegisteredProviders(): InferenceType[] { const types: InferenceType[] = this.inferenceProviderRegistry.getAll().map(provider => provider.type); return [...new Set(types)]; } /** * return an inference server * @param containerId the containerId of the inference server */ public get(containerId: string): InferenceServer | undefined { return this.#servers.get(containerId); } /** * return the first inference server which is using the specific model * it throws if the model backend is not currently supported */ public findServerByModel(model: ModelInfo): InferenceServer | undefined { // check if model backend is supported const backend: InferenceType = getInferenceType([model]); const providers: InferenceProvider[] = this.inferenceProviderRegistry .getByType(backend) .filter(provider => provider.enabled()); if (providers.length === 0) { throw new Error('no enabled provider could be found.'); } return this.getServers().find(s => s.models.some(m => m.id === model.id)); } /** * Creating an inference server can be heavy task (pulling image, uploading model to WSL etc.) * The frontend cannot wait endlessly, therefore we provide a method returning a tracking identifier * that can be used to fetch the tasks * * @param config the config to use to create the inference server * * @return a unique tracking identifier to follow the creation request */ requestCreateInferenceServer(config: InferenceServerConfig): string { // create a tracking id to put in the labels const trackingId: string = getRandomString(); config.labels = { ...config.labels, trackingId: trackingId, }; this.#taskRunner .runAsTask( { trackingId: trackingId, }, { loadingLabel: 'Creating Inference server', errorMsg: err => `Something went wrong while trying to create an inference server ${String(err)}.`, failFastSubtasks: true, }, async ({ updateLabels }) => { const containerId = await this.createInferenceServer(config); updateLabels(labels => ({ ...labels, containerId })); }, ) .catch(() => {}); return trackingId; } /** * Given an engineId, it will create an inference server using an InferenceProvider. * @param config * * @return the containerId of the created inference server */ async createInferenceServer(config: InferenceServerConfig): Promise { if (!this.isInitialize()) throw new Error('Cannot start the inference server: not initialized.'); // Get the backend for the model inference server {@link InferenceType} const backend: InferenceType = getInferenceType(config.modelsInfo); let provider: InferenceProvider; if (config.inferenceProvider) { provider = this.inferenceProviderRegistry.get(config.inferenceProvider); if (!provider.enabled()) throw new Error('provider requested is not enabled.'); } else { const providers: InferenceProvider[] = this.inferenceProviderRegistry .getByType(backend) .filter(provider => provider.enabled()); if (providers.length === 0) throw new Error('no enabled provider could be found.'); provider = providers[0]; } let connection: ContainerProviderConnection | undefined = undefined; if (config.connection) { connection = this.podmanConnection.getContainerProviderConnection(config.connection); } else { connection = this.podmanConnection.findRunningContainerProviderConnection(); } if (!connection) throw new Error('cannot find running container provider connection'); await provider.prePerform(config); // upload models to podman machine if user system is supported config.modelsInfo = await Promise.all( config.modelsInfo.map(modelInfo => this.modelsManager.uploadModelToPodmanMachine(connection, modelInfo, config.labels).then(path => ({ ...modelInfo, file: { path: dirname(path), file: basename(path), }, })), ), ); // create the inference server using the selected inference provider const inferenceServer = await provider.perform(config); // Adding a new inference server this.#servers.set(inferenceServer.container.containerId, inferenceServer); // Watch for container changes this.watchContainerStatus(inferenceServer.container.engineId, inferenceServer.container.containerId); // Log usage this.telemetry.logUsage('inference.start', { models: config.modelsInfo.map(model => getHash(model.id)), }); this.notify(); return inferenceServer.container.containerId; } /** * Given an engineId and a containerId, inspect the container and update the servers * @param engineId * @param containerId * @private */ private updateServerStatus(engineId: string, containerId: string): void { const server = this.#servers.get(containerId); if (server === undefined) throw new Error('Something went wrong while trying to get container status got undefined Inference Server.'); // we should not update the server while we are in a transition state. if (isTransitioning(server)) return; // Inspect container containerEngine .inspectContainer(engineId, containerId) .then(result => { // Update server this.#servers.set(containerId, { ...server, status: result.State.Status === 'running' ? 'running' : 'stopped', health: result.State.Health, }); this.notify(); }) .catch((err: unknown) => { console.error( `Something went wrong while trying to inspect container ${containerId}. Trying to refresh servers.`, err, ); this.retryableRefresh(2); }); } /** * Watch for container status changes * @param engineId * @param containerId the container to watch out */ private watchContainerStatus(engineId: string, containerId: string): void { // Update now this.updateServerStatus(engineId, containerId); // Create a pulling update for container health check const intervalId = setInterval(this.updateServerStatus.bind(this, engineId, containerId), 10000); this.#disposables.push( Disposable.create(() => { clearInterval(intervalId); }), ); // Subscribe to container status update const disposable = this.containerRegistry.subscribe(containerId, (status: string) => { switch (status) { case 'die': this.updateServerStatus(engineId, containerId); clearInterval(intervalId); break; case 'remove': // Update the list of servers this.removeInferenceServer(containerId); disposable.dispose(); clearInterval(intervalId); break; } }); // Allowing cleanup if extension is stopped this.#disposables.push(disposable); } private watchMachineEvent(_event: PodmanConnectionEvent): void { this.retryableRefresh(2); } /** * Listener for container start events * @param event the event containing the id of the container */ private watchContainerStart(event: ContainerEvent): void { // We might have a start event for an inference server we already know about if (this.#servers.has(event.id)) return; containerEngine .listContainers() .then(containers => { const container = containers.find(c => c.Id === event.id); if (container === undefined) { return; } if (container.Labels && LABEL_INFERENCE_SERVER in container.Labels) { this.watchContainerStatus(container.engineId, container.Id); } }) .catch((err: unknown) => { console.error(`Something went wrong in container start listener.`, err); }); } /** * This non-async utility method is made to retry refreshing the inference server with some delay * in case of error raised. * * @param retry the number of retry allowed */ private retryableRefresh(retry: number = 3): void { if (retry === 0) { console.error('Cannot refresh inference servers: retry limit has been reached. Cleaning manager.'); this.cleanDisposables(); this.#servers.clear(); this.#initialized = false; return; } this.refreshInferenceServers().catch((err: unknown): void => { console.warn(`Something went wrong while trying to refresh inference server. (retry left ${retry})`, err); setTimeout( () => { this.retryableRefresh(retry - 1); }, // eslint-disable-next-line sonarjs/pseudo-random 2000 + Math.random() * 1000, ); }); } /** * Refresh the inference servers by listing all containers. * * This method has an important impact as it (re-)create all inference servers */ private async refreshInferenceServers(): Promise { const containers: ContainerInfo[] = await containerEngine.listContainers(); const filtered = containers.filter(c => c.Labels && LABEL_INFERENCE_SERVER in c.Labels); // clean existing disposables this.cleanDisposables(); this.#servers = new Map( filtered.map(containerInfo => { let modelInfos: ModelInfo[] = []; try { const modelIds: string[] = JSON.parse(containerInfo.Labels[LABEL_INFERENCE_SERVER]); modelInfos = modelIds .filter(id => this.modelsManager.isModelOnDisk(id)) .map(id => this.modelsManager.getModelInfo(id)); } catch (err: unknown) { console.error('Something went wrong while getting the models ids from the label.', err); } return [ containerInfo.Id, { container: { containerId: containerInfo.Id, engineId: containerInfo.engineId, }, connection: { port: !!containerInfo.Ports && containerInfo.Ports.length > 0 ? containerInfo.Ports[0].PublicPort : -1, }, status: containerInfo.Status === 'running' ? 'running' : 'stopped', models: modelInfos, type: getInferenceType(modelInfos), labels: containerInfo.Labels || {}, }, ]; }), ); // (re-)create container watchers this.#servers.forEach(server => this.watchContainerStatus(server.container.engineId, server.container.containerId)); this.#initialized = true; // notify update this.notify(); } /** * Remove the reference of the inference server * /!\ Does not delete the corresponding container * @param containerId */ private removeInferenceServer(containerId: string): void { this.#servers.delete(containerId); this.notify(); } /** * Delete the InferenceServer instance from #servers and matching container * @param containerId the id of the container running the Inference Server */ async deleteInferenceServer(containerId: string): Promise { const server = this.#servers.get(containerId); if (!server) { throw new Error(`cannot find a corresponding server for container id ${containerId}.`); } try { // Set status a deleting this.setInferenceServerStatus(server.container.containerId, 'deleting'); // If the server is running we need to stop it. if (server.status === 'running') { await containerEngine.stopContainer(server.container.engineId, server.container.containerId); } // Delete the container await containerEngine.deleteContainer(server.container.engineId, server.container.containerId); // Delete the reference this.removeInferenceServer(containerId); } catch (err: unknown) { console.error('Something went wrong while trying to delete the inference server.', err); this.setInferenceServerStatus(server.container.containerId, 'error'); this.retryableRefresh(2); } } /** * Start an inference server from the container id * @param containerId the identifier of the container to start */ async startInferenceServer(containerId: string): Promise { if (!this.isInitialize()) throw new Error('Cannot start the inference server.'); const server = this.#servers.get(containerId); if (server === undefined) throw new Error(`cannot find a corresponding server for container id ${containerId}.`); try { // set status to starting this.setInferenceServerStatus(server.container.containerId, 'starting'); await containerEngine.startContainer(server.container.engineId, server.container.containerId); this.setInferenceServerStatus(server.container.containerId, 'running'); // start watch for container status update this.watchContainerStatus(server.container.engineId, server.container.containerId); } catch (error: unknown) { console.error(error); this.telemetry.logError('inference.start', { message: 'error starting inference', error: error, }); this.setInferenceServerStatus(server.container.containerId, 'error'); this.retryableRefresh(1); } } /** * Stop an inference server from the container id * @param containerId the identifier of the container to stop */ async stopInferenceServer(containerId: string): Promise { if (!this.isInitialize()) throw new Error('Cannot stop the inference server.'); const server = this.#servers.get(containerId); if (server === undefined) throw new Error(`cannot find a corresponding server for container id ${containerId}.`); if (isTransitioning(server)) throw new Error(`cannot stop a transitioning server.`); try { // set server to stopping this.setInferenceServerStatus(server.container.containerId, 'stopping'); await containerEngine.stopContainer(server.container.engineId, server.container.containerId); // once stopped update the status this.setInferenceServerStatus(server.container.containerId, 'stopped'); } catch (error: unknown) { console.error(error); this.telemetry.logError('inference.stop', { message: 'error stopping inference', error: error, }); this.setInferenceServerStatus(server.container.containerId, 'error'); this.retryableRefresh(1); } } /** * Given an containerId, set the status of the corresponding inference server * @param containerId * @param status */ private setInferenceServerStatus(containerId: string, status: InferenceServerStatus): void { const server = this.#servers.get(containerId); if (server === undefined) throw new Error(`cannot find a corresponding server for container id ${containerId}.`); this.#servers.set(server.container.containerId, { ...server, status: status, health: undefined, // always reset health history when changing status }); this.notify(); } } ================================================ FILE: packages/backend/src/managers/instructlab/instructlabManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { TaskRegistry } from '../../registries/TaskRegistry'; import { beforeAll, beforeEach, expect, test, vi } from 'vitest'; import type { ContainerCreateResult, ContainerInfo, ImageInfo, TelemetryLogger } from '@podman-desktop/api'; import { containerEngine, EventEmitter } from '@podman-desktop/api'; import type { PodmanConnection } from '../podmanConnection'; import { INSTRUCTLAB_CONTAINER_LABEL, InstructlabManager } from './instructlabManager'; import { ContainerRegistry } from '../../registries/ContainerRegistry'; import { TestEventEmitter } from '../../tests/utils'; import { VMType } from '@shared/models/IPodman'; import type { Task } from '@shared/models/ITask'; import instructlab_images from '../../assets/instructlab-images.json'; import { INSTRUCTLAB_CONTAINER_TRACKINGID } from '@shared/models/instructlab/IInstructlabContainerInfo'; import type { RpcExtension } from '@shared/messages/MessageProxy'; vi.mock('@podman-desktop/api', () => { return { EventEmitter: vi.fn(), containerEngine: { listContainers: vi.fn(), listImages: vi.fn(), createContainer: vi.fn(), onEvent: vi.fn(), }, }; }); const taskRegistry = new TaskRegistry({ fire: vi.fn().mockResolvedValue(true) } as unknown as RpcExtension); const podmanConnection: PodmanConnection = { onPodmanConnectionEvent: vi.fn(), findRunningContainerProviderConnection: vi.fn(), } as unknown as PodmanConnection; const telemetryMock = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; let instructlabManager: InstructlabManager; beforeAll(() => { vi.mocked(EventEmitter).mockImplementation(() => new TestEventEmitter() as unknown as EventEmitter); }); beforeEach(() => { const containerRegistry = new ContainerRegistry(); containerRegistry.init(); instructlabManager = new InstructlabManager('', taskRegistry, podmanConnection, containerRegistry, telemetryMock); instructlabManager.init(); taskRegistry.deleteByLabels({ trackingId: INSTRUCTLAB_CONTAINER_TRACKINGID }); }); test('getInstructLabContainer should return undefined if no containers', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); const containerId = await instructlabManager.getInstructLabContainer(); expect(containerId).toBeUndefined(); }); test('getInstructLabContainer should return undefined if no instructlab container', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([{ Id: 'dummyId' } as unknown as ContainerInfo]); const containerId = await instructlabManager.getInstructLabContainer(); expect(containerId).toBeUndefined(); }); test('getInstructLabContainer should return id if instructlab container', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([ { Id: 'dummyId', State: 'running', Labels: { [`${INSTRUCTLAB_CONTAINER_LABEL}`]: 'dummyLabel' }, } as unknown as ContainerInfo, ]); const containerId = await instructlabManager.getInstructLabContainer(); expect(containerId).toBe('dummyId'); }); test('requestCreateInstructlabContainer throws error if no podman connection', async () => { const containerIdPromise = instructlabManager.requestCreateInstructlabContainer({}); await expect(containerIdPromise).rejects.toBeInstanceOf(Error); }); async function waitTasks(id: string, nb: number): Promise { return vi.waitFor(() => { const tasks = taskRegistry.getTasksByLabels({ trackingId: id }); if (tasks.length !== nb) { throw new Error('not completed'); } return tasks; }); } test('requestCreateInstructlabContainer returns id and error if listImage returns error', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockRejectedValue(new Error()); await instructlabManager.requestCreateInstructlabContainer({}); const tasks = await waitTasks(INSTRUCTLAB_CONTAINER_TRACKINGID, 2); expect(tasks.some(task => task.state === 'error')).toBeTruthy(); }); test('requestCreateInstructlabContainer returns id and error if listImage returns image', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: [instructlab_images.default] } as unknown as ImageInfo, ]); await instructlabManager.requestCreateInstructlabContainer({}); const tasks = await waitTasks(INSTRUCTLAB_CONTAINER_TRACKINGID, 3); expect(tasks.some(task => task.state === 'error')).toBeTruthy(); }); test('requestCreateInstructlabContainer returns id and no error if createContainer returns id', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: [instructlab_images.default] } as unknown as ImageInfo, ]); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'containerId', } as unknown as ContainerCreateResult); await instructlabManager.requestCreateInstructlabContainer({}); const tasks = await waitTasks(INSTRUCTLAB_CONTAINER_TRACKINGID, 3); expect(tasks.some(task => task.state === 'error')).toBeFalsy(); }); ================================================ FILE: packages/backend/src/managers/instructlab/instructlabManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { InstructlabSession } from '@shared/models/instructlab/IInstructlabSession'; import type { InstructlabContainerConfiguration } from '@shared/models/instructlab/IInstructlabContainerConfiguration'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { type TelemetryLogger, containerEngine, type ContainerProviderConnection, type ContainerCreateOptions, type Disposable, } from '@podman-desktop/api'; import type { PodmanConnection, PodmanConnectionEvent } from '../podmanConnection'; import instructlab_images from '../../assets/instructlab-images.json'; import { getImageInfo } from '../../utils/inferenceUtils'; import path from 'node:path'; import fs from 'node:fs/promises'; import type { ContainerRegistry, ContainerEvent } from '../../registries/ContainerRegistry'; import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils'; import { INSTRUCTLAB_CONTAINER_TRACKINGID } from '@shared/models/instructlab/IInstructlabContainerInfo'; import { getRandomName } from '../../utils/randomUtils'; export const INSTRUCTLAB_CONTAINER_LABEL = 'ai-lab-instructlab-container'; export class InstructlabManager implements Disposable { #initialized: boolean; #containerId: string | undefined; #disposables: Disposable[]; constructor( private readonly appUserDirectory: string, private taskRegistry: TaskRegistry, private podmanConnection: PodmanConnection, private containerRegistry: ContainerRegistry, private telemetryLogger: TelemetryLogger, ) { this.#initialized = false; this.#disposables = []; } init(): void { this.#disposables.push(this.podmanConnection.onPodmanConnectionEvent(this.watchMachineEvent.bind(this))); this.#disposables.push(this.containerRegistry.onStartContainerEvent(this.onStartContainerEvent.bind(this))); this.#disposables.push(this.containerRegistry.onStopContainerEvent(this.onStopContainerEvent.bind(this))); } dispose(): void { this.#disposables.forEach(disposable => disposable.dispose()); this.#disposables = []; } private async refreshInstructlabContainer(id?: string): Promise { const containers = await containerEngine.listContainers(); const containerId = (this.#containerId = containers .filter(c => !id || c.Id === id) .filter(c => c.State === 'running' && c.Labels && INSTRUCTLAB_CONTAINER_LABEL in c.Labels) .map(c => c.Id) .at(0)); if ((id && containerId) || !id) { this.#containerId = containerId; } } private async watchMachineEvent(event: PodmanConnectionEvent): Promise { if ((event.status === 'started' && !this.#containerId) || (event.status === 'stopped' && this.#containerId)) { await this.refreshInstructlabContainer(); } } private async onStartContainerEvent(event: ContainerEvent): Promise { await this.refreshInstructlabContainer(event.id); } private onStopContainerEvent(event: ContainerEvent): void { console.log('event id:', event.id, ' containerId: ', this.#containerId); if (this.#containerId === event.id) { this.#containerId = undefined; this.taskRegistry.deleteByLabels({ trackingId: INSTRUCTLAB_CONTAINER_TRACKINGID }); } } public getSessions(): InstructlabSession[] { return [ { name: 'session 1', modelId: 'hf.facebook.detr-resnet-101', targetModel: 'hf.facebook.detr-resnet-101-target', repository: '/a1', status: 'fine-tuned', createdTime: new Date(new Date().getTime() - 6 * 24 * 60 * 60 * 1000).getTime() / 1000, // 6 days ago }, { name: 'session 2', modelId: 'hf.ibm-granite.granite-8b-code-instruct', targetModel: 'hf.ibm-granite.granite-8b-code-instruct-target', repository: '/a2', status: 'generating-instructions', createdTime: new Date(new Date().getTime() - 4 * 60 * 60 * 1000).getTime() / 1000, // 4 hours ago }, ]; } async getInstructLabContainer(): Promise { if (!this.#initialized) { const containers = await containerEngine.listContainers(); this.#containerId = containers .filter(c => c.State === 'running' && c.Labels && INSTRUCTLAB_CONTAINER_LABEL in c.Labels) .map(c => c.Id) .at(0); this.#initialized = true; } return this.#containerId; } async requestCreateInstructlabContainer(config: InstructlabContainerConfiguration): Promise { // create a tracking id to put in the labels const trackingId: string = INSTRUCTLAB_CONTAINER_TRACKINGID; const labels = { trackingId: trackingId, }; const task = this.taskRegistry.createTask('Creating InstructLab container', 'loading', { trackingId: trackingId, }); let connection: ContainerProviderConnection | undefined; if (config.connection) { connection = this.podmanConnection.getContainerProviderConnection(config.connection); } else { connection = this.podmanConnection.findRunningContainerProviderConnection(); } if (!connection) throw new Error('cannot find running container provider connection'); this.createInstructlabContainer(connection, labels) .then((containerId: string) => { this.#containerId = containerId; this.taskRegistry.updateTask({ ...task, state: 'success', labels: { ...task.labels, containerId: containerId, }, }); this.telemetryLogger.logUsage('instructlab.startContainer'); }) .catch((err: unknown) => { // Get all tasks using the tracker const tasks = this.taskRegistry.getTasksByLabels({ trackingId: trackingId, }); // Filter the one no in loading state tasks .filter(t => t.state === 'loading' && t.id !== task.id) .forEach(t => { this.taskRegistry.updateTask({ ...t, state: 'error', }); }); // Update the main task this.taskRegistry.updateTask({ ...task, state: 'error', error: `Something went wrong while trying to create an inference server ${String(err)}.`, }); this.telemetryLogger.logError('instructlab.startContainer', { error: err }); }); } async createInstructlabContainer( connection: ContainerProviderConnection, labels: { [p: string]: string }, ): Promise { const image = instructlab_images.default; const pullingTask = this.taskRegistry.createTask(`Pulling ${image}.`, 'loading', labels); const imageInfo = await getImageInfo(connection, image, () => {}) .catch((err: unknown) => { pullingTask.state = 'error'; pullingTask.progress = undefined; pullingTask.error = `Something went wrong while pulling ${image}: ${String(err)}`; throw err; }) .then(imageInfo => { pullingTask.state = 'success'; pullingTask.progress = undefined; return imageInfo; }) .finally(() => { this.taskRegistry.updateTask(pullingTask); }); const folder = await this.getInstructLabContainerFolder(); const containerTask = this.taskRegistry.createTask('Starting InstructLab container', 'loading', labels); const createContainerOptions: ContainerCreateOptions = { Image: imageInfo.Id, name: getRandomName('instructlab'), Labels: { [INSTRUCTLAB_CONTAINER_LABEL]: image }, HostConfig: { AutoRemove: true, SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION], Mounts: [ { Target: '/instructlab/.cache/instructlab', Source: path.join(folder, '.cache'), Type: 'bind', }, { Target: '/instructlab/.config/instructlab', Source: path.join(folder, '.config'), Type: 'bind', }, { Target: '/instructlab/.local/share/instructlab', Source: path.join(folder, '.local'), Type: 'bind', }, ], UsernsMode: 'keep-id:uid=1000,gid=1000', }, OpenStdin: true, start: true, }; try { const { id } = await containerEngine.createContainer(imageInfo.engineId, createContainerOptions); // update the task containerTask.state = 'success'; containerTask.progress = undefined; return id; } catch (err: unknown) { containerTask.state = 'error'; containerTask.progress = undefined; containerTask.error = `Something went wrong while creating container: ${String(err)}`; throw err; } finally { this.taskRegistry.updateTask(containerTask); } } private async getInstructLabContainerFolder(): Promise { const instructlabPath = path.join(this.appUserDirectory, 'instructlab', 'container'); await fs.mkdir(instructlabPath, { recursive: true }); await fs.mkdir(path.join(instructlabPath, '.cache'), { recursive: true }); await fs.mkdir(path.join(instructlabPath, '.config'), { recursive: true }); await fs.mkdir(path.join(instructlabPath, '.local'), { recursive: true }); return instructlabPath; } } ================================================ FILE: packages/backend/src/managers/llama-stack/llamaStackManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { TaskRegistry } from '../../registries/TaskRegistry'; import { assert, beforeEach, expect, test, vi } from 'vitest'; import type { ContainerCreateResult, ContainerInfo, Disposable, ImageInfo, TelemetryLogger } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import type { PodmanConnection } from '../podmanConnection'; import type { ContainerRegistry } from '../../registries/ContainerRegistry'; import { VMType } from '@shared/models/IPodman'; import type { Task } from '@shared/models/ITask'; import llama_stack_images from '../../assets/llama-stack-images.json'; import llama_stack_playground_images from '../../assets/llama-stack-playground-images.json'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { LLAMA_STACK_API_PORT_LABEL, LLAMA_STACK_CONTAINER_LABEL, LLAMA_STACK_PLAYGROUND_PORT_LABEL, LlamaStackManager, } from './llamaStackManager'; import { LLAMA_STACK_CONTAINER_TRACKINGID, type LlamaStackContainers, } from '@shared/models/llama-stack/LlamaStackContainerInfo'; import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry'; import type { ExtensionConfiguration } from '@shared/models/IExtensionConfiguration'; import type { ModelsManager } from '../modelsManager'; import * as utilsPorts from '../../utils/ports'; vi.mock('@podman-desktop/api', () => { return { EventEmitter: vi.fn(), containerEngine: { listContainers: vi.fn(), listImages: vi.fn(), createContainer: vi.fn(), onEvent: vi.fn(), pullImage: vi.fn(), inspectContainer: vi.fn(), startContainer: vi.fn(), stopContainer: vi.fn(), deleteContainer: vi.fn(), }, env: { isWindows: false, }, }; }); vi.mock('../../utils/ports'); class TestLlamaStackManager extends LlamaStackManager { public override async refreshLlamaStackContainers(): Promise { return super.refreshLlamaStackContainers(); } public override getContainersInfo(): LlamaStackContainers | undefined { return super.getContainersInfo(); } } const podmanConnection: PodmanConnection = { onPodmanConnectionEvent: vi.fn(), findRunningContainerProviderConnection: vi.fn(), execute: vi.fn(), } as unknown as PodmanConnection; const containerRegistry = { onStartContainerEvent: vi.fn(), onStopContainerEvent: vi.fn(), onHealthyContainerEvent: vi.fn(), } as unknown as ContainerRegistry; const configurationRegistry = { getExtensionConfiguration: vi.fn(), } as unknown as ConfigurationRegistry; const telemetryMock = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; const modelsManagerMock = { getModelsInfo: vi.fn(), } as unknown as ModelsManager; let taskRegistry: TaskRegistry; let llamaStackManager: TestLlamaStackManager; const LLAMA_STACK_CONTAINER_RUNNING = { Id: 'dummyId', State: 'running', Labels: { [LLAMA_STACK_CONTAINER_LABEL]: 'dummyLabel', [LLAMA_STACK_API_PORT_LABEL]: '50000', }, } as unknown as ContainerInfo; const LLAMA_STACK_CONTAINER_STOPPED = { Id: 'dummyId', State: 'stopped', } as unknown as ContainerInfo; const NON_LLAMA_STACK_CONTAINER = { Id: 'dummyId' } as unknown as ContainerInfo; const NO_OP_DISPOSABLE = { dispose: (): void => {}, } as Disposable; beforeEach(() => { vi.resetAllMocks(); taskRegistry = new TaskRegistry({ fire: vi.fn().mockResolvedValue(true) } as unknown as RpcExtension); llamaStackManager = new TestLlamaStackManager( '', taskRegistry, podmanConnection, containerRegistry, configurationRegistry, telemetryMock, modelsManagerMock, ); }); test('getLlamaStackContainers should return undefined if no containers', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); const stack_containers = await llamaStackManager.getLlamaStackContainers(); expect(stack_containers).toEqual({ server: undefined, playground: undefined }); }); test('getLlamaStackContainers should return undefined if no llama stack container', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([NON_LLAMA_STACK_CONTAINER]); const stack_containers = await llamaStackManager.getLlamaStackContainers(); expect(stack_containers).toEqual({ server: undefined, playground: undefined }); }); test('getLlamaStackContainers should return server info if llama stack server container', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([LLAMA_STACK_CONTAINER_RUNNING]); const containerInfo = await llamaStackManager.getLlamaStackContainers(); expect(containerInfo).toEqual({ server: { containerId: 'dummyId', port: 50000, state: 'running' }, playground: undefined, }); }); test('requestcreateLlamaStackContainerss throws error if no podman connection', async () => { const containerIdPromise = llamaStackManager.requestcreateLlamaStackContainerss({}); await expect(containerIdPromise).rejects.toBeInstanceOf(Error); }); async function waitTasks(id: string, nb: number): Promise { return vi.waitFor(() => { const tasks = taskRegistry.getTasksByLabels({ trackingId: id }); if (tasks.length < nb) { throw new Error('not completed'); } return tasks.slice(0, nb); }); } test('requestcreateLlamaStackContainerss returns id and error if listImage returns error', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockRejectedValue(new Error()); await llamaStackManager.requestcreateLlamaStackContainerss({}); const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 2); expect(tasks.some(task => task.state === 'error')).toBeTruthy(); }); test('requestcreateLlamaStackContainerss returns id and error if listImage returns image', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: [llama_stack_images.default] } as unknown as ImageInfo, ]); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ apiPort: 10000, } as ExtensionConfiguration); await llamaStackManager.requestcreateLlamaStackContainerss({}); const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 3); expect(tasks.some(task => task.state === 'error')).toBeTruthy(); }); test('requestcreateLlamaStackContainerss returns no error if createContainer returns id and container becomes healthy', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: [llama_stack_images.default, llama_stack_playground_images.default], Id: 'imageId', engineId: 'engine1', } as unknown as ImageInfo, ]); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'containerId', } as unknown as ContainerCreateResult); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ apiPort: 10000, } as ExtensionConfiguration); vi.mocked(utilsPorts.getFreeRandomPort).mockResolvedValueOnce(1234).mockResolvedValueOnce(5678); vi.mocked(containerEngine.pullImage).mockResolvedValue(); vi.mocked(modelsManagerMock.getModelsInfo).mockReturnValue([]); vi.mocked(podmanConnection.execute).mockResolvedValue({ stdout: '', stderr: '', command: '' }); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(cb => { // Fire the callback immediately for testing setTimeout(() => cb({ id: 'containerId' }), 100); return NO_OP_DISPOSABLE; }); await llamaStackManager.requestcreateLlamaStackContainerss({}); const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 4); expect(tasks.some(task => task.state === 'error')).toBeFalsy(); }); test('requestcreateLlamaStackContainerss registers all local models', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: [llama_stack_images.default, llama_stack_playground_images.default] } as unknown as ImageInfo, ]); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'containerId', } as unknown as ContainerCreateResult); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ apiPort: 10000, } as ExtensionConfiguration); vi.mocked(utilsPorts.getFreeRandomPort).mockResolvedValueOnce(1234).mockResolvedValueOnce(5678); vi.mocked(containerEngine.pullImage).mockResolvedValue(); vi.mocked(podmanConnection.execute).mockResolvedValue({ stdout: '', stderr: '', command: '' }); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(cb => { setTimeout(() => cb({ id: 'containerId' }), 100); return NO_OP_DISPOSABLE; }); vi.mocked(modelsManagerMock.getModelsInfo).mockReturnValue([ { id: 'model1', name: 'Model 1', description: '', file: { file: 'model1', path: '/path/to' }, }, { id: 'model2', name: 'Model 2', description: '', file: { file: 'model2', path: '/path/to' }, }, { id: 'model3', name: 'Model 3', description: '', }, ]); await llamaStackManager.requestcreateLlamaStackContainerss({}); const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 6); expect(tasks.some(task => task.state === 'error')).toBeFalsy(); await vi.waitFor(() => { expect(podmanConnection.execute).toHaveBeenCalledTimes(2); }); expect(podmanConnection.execute).toHaveBeenCalledWith(expect.anything(), [ 'exec', 'containerId', 'llama-stack-client', 'models', 'register', 'Model 1', '--provider-id', 'podman-ai-lab', ]); expect(podmanConnection.execute).toHaveBeenCalledWith(expect.anything(), [ 'exec', 'containerId', 'llama-stack-client', 'models', 'register', 'Model 2', '--provider-id', 'podman-ai-lab', ]); }); test('requestcreateLlamaStackContainerss creates playground container', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: [llama_stack_images.default, llama_stack_playground_images.default] } as unknown as ImageInfo, ]); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'containerId', } as unknown as ContainerCreateResult); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ apiPort: 10000, } as ExtensionConfiguration); vi.mocked(utilsPorts.getFreeRandomPort).mockResolvedValueOnce(1234).mockResolvedValueOnce(5678); vi.mocked(containerEngine.pullImage).mockResolvedValue(); vi.mocked(podmanConnection.execute).mockResolvedValue({ stdout: '', stderr: '', command: '' }); vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(cb => { setTimeout(() => cb({ id: 'containerId' }), 100); return NO_OP_DISPOSABLE; }); vi.mocked(modelsManagerMock.getModelsInfo).mockReturnValue([ { id: 'model1', name: 'Model 1', description: '', file: { file: 'model1', path: '/path/to' }, }, { id: 'model2', name: 'Model 2', description: '', file: { file: 'model2', path: '/path/to' }, }, { id: 'model3', name: 'Model 3', description: '', }, ]); await llamaStackManager.requestcreateLlamaStackContainerss({}); const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 7); expect(tasks.some(task => task.state === 'error')).toBeFalsy(); expect(containerEngine.createContainer).toHaveBeenCalledTimes(2); expect(containerEngine.createContainer).toHaveBeenNthCalledWith( 2, undefined, expect.objectContaining({ Env: ['LLAMA_STACK_ENDPOINT=http://host.containers.internal:1234'], HostConfig: expect.objectContaining({ PortBindings: { '8501/tcp': [ { HostPort: '5678', }, ], }, }), }), ); }); test('requestcreateLlamaStackContainerss starts both if server and playground exist', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); const server = { Id: 'serverId', Labels: { [LLAMA_STACK_API_PORT_LABEL]: '50000' } } as unknown as ContainerInfo; const playground = { Id: 'playgroundId', Labels: { [LLAMA_STACK_PLAYGROUND_PORT_LABEL]: '60000' }, } as unknown as ContainerInfo; vi.mocked(containerEngine.listContainers).mockResolvedValue([server, playground]); const startBothSpy = vi .spyOn(llamaStackManager as unknown as { startBoth: () => Promise }, 'startBoth') .mockResolvedValue(undefined); await llamaStackManager.requestcreateLlamaStackContainerss({}); expect(startBothSpy).toHaveBeenCalledWith(server, playground, expect.any(Object)); }); test('requestcreateLlamaStackContainerss creates playground if server exists but playground missing', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); const server = { Id: 'serverId', Labels: { [LLAMA_STACK_API_PORT_LABEL]: '50000' } } as unknown as ContainerInfo; vi.mocked(containerEngine.listContainers).mockResolvedValue([server]); const createPlaygroundSpy = vi .spyOn( llamaStackManager as unknown as { createPlaygroundFromServer: () => Promise }, 'createPlaygroundFromServer', ) .mockResolvedValue(undefined); await llamaStackManager.requestcreateLlamaStackContainerss({}); expect(createPlaygroundSpy).toHaveBeenCalledWith(server, expect.any(Object), expect.anything()); }); test('requestcreateLlamaStackContainerss deletes existing playground and creates both if server missing', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); const playground = { Id: 'playgroundId', Labels: { [LLAMA_STACK_PLAYGROUND_PORT_LABEL]: '60000' }, } as unknown as ContainerInfo; vi.mocked(containerEngine.listContainers).mockResolvedValue([playground]); const createBothSpy = vi .spyOn(llamaStackManager as unknown as { createBoth: () => Promise }, 'createBoth') .mockResolvedValue(undefined); await llamaStackManager.requestcreateLlamaStackContainerss({}); expect(createBothSpy).toHaveBeenCalledWith(playground, expect.any(Object), expect.anything()); }); test('requestcreateLlamaStackContainerss creates both if server and playground missing', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ name: 'Podman Machine', vmType: VMType.UNKNOWN, type: 'podman', status: () => 'started', endpoint: { socketPath: 'socket.sock', }, }); vi.mocked(containerEngine.listContainers).mockResolvedValue([]); const createBothSpy = vi .spyOn(llamaStackManager as unknown as { createBoth: () => Promise }, 'createBoth') .mockResolvedValue(undefined); await llamaStackManager.requestcreateLlamaStackContainerss({}); expect(createBothSpy).toHaveBeenCalledWith(undefined, expect.any(Object), expect.anything()); }); test('onPodmanConnectionEvent start event should call refreshLlamaStackContainers and set containerInfo', async () => { vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers'); vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]); vi.mocked(podmanConnection.onPodmanConnectionEvent).mockImplementation(f => { f({ status: 'started', }); return NO_OP_DISPOSABLE; }); llamaStackManager.init(); expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith(); await vi.waitFor(() => { expect(llamaStackManager.getContainersInfo()).toEqual({ server: { containerId: 'dummyId', port: 50000, state: 'running' }, playground: undefined, }); }); }); test('onPodmanConnectionEvent stop event should call refreshLlamaStackContainers and clear containerInfo', async () => { vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers'); vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]); vi.mocked(podmanConnection.onPodmanConnectionEvent).mockReturnValue(NO_OP_DISPOSABLE); llamaStackManager.init(); const listener = vi.mocked(podmanConnection.onPodmanConnectionEvent).mock.calls[0][0]; assert(listener, 'onPodmanConnectionEvent should have been called'); listener({ status: 'started' }); expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith(); await vi.waitFor(() => { expect(llamaStackManager.getContainersInfo()).toEqual({ server: { containerId: 'dummyId', port: 50000, state: 'running' }, playground: undefined, }); }); vi.mocked(llamaStackManager.refreshLlamaStackContainers).mockClear(); vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_STOPPED]); listener({ status: 'stopped' }); expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith(); await vi.waitFor(async () => { expect(llamaStackManager.getContainersInfo()).toEqual({ server: undefined, playground: undefined }); }); }); test('onStartContainerEvent event should call refreshLlamaStackContainers and set containerInfo', async () => { vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers'); vi.mocked(containerEngine.listContainers).mockResolvedValue([]); vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]); vi.mocked(containerRegistry.onStartContainerEvent).mockImplementation(f => { f({ id: 'dummyId', }); return NO_OP_DISPOSABLE; }); llamaStackManager.init(); expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith(); await vi.waitFor(() => { expect(llamaStackManager.getContainersInfo()).toEqual({ server: { containerId: 'dummyId', port: 50000, state: 'running' }, playground: undefined, }); }); }); test('onStopContainerEvent event should call refreshLlamaStackContainers and clear containerInfo', async () => { vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers'); vi.spyOn(taskRegistry, 'deleteByLabels'); vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]); vi.mocked(containerRegistry.onStartContainerEvent).mockImplementation(f => { f({ id: 'dummyId', }); return NO_OP_DISPOSABLE; }); vi.mocked(containerRegistry.onStopContainerEvent).mockReturnValue(NO_OP_DISPOSABLE); llamaStackManager.init(); expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith(); await vi.waitFor(() => { expect(llamaStackManager.getContainersInfo()).toEqual({ server: { containerId: 'dummyId', port: 50000, state: 'running' }, playground: undefined, }); }); vi.mocked(llamaStackManager.refreshLlamaStackContainers).mockClear(); vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_STOPPED]); const listener = vi.mocked(containerRegistry.onStopContainerEvent).mock.calls[0][0]; assert(listener, 'onStopContainerEvent should have been called'); listener({ id: 'dummyId' }); expect(taskRegistry.deleteByLabels).toHaveBeenCalled(); await vi.waitFor(async () => { expect(llamaStackManager.getContainersInfo()).toBeUndefined(); }); }); ================================================ FILE: packages/backend/src/managers/llama-stack/llamaStackManager.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { TaskRegistry } from '../../registries/TaskRegistry'; import { containerEngine, env, process, type ContainerInfo, type Disposable, type TelemetryLogger, type ContainerProviderConnection, type ContainerCreateOptions, type ImageInfo, } from '@podman-desktop/api'; import type { PodmanConnection, PodmanConnectionEvent } from '../podmanConnection'; import llama_stack_images from '../../assets/llama-stack-images.json'; import llama_stack_playground_images from '../../assets/llama-stack-playground-images.json'; import { getImageInfo } from '../../utils/inferenceUtils'; import type { ContainerRegistry, ContainerEvent, ContainerHealthy } from '../../registries/ContainerRegistry'; import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils'; import { getRandomName } from '../../utils/randomUtils'; import type { LlamaStackContainerInfo, LlamaStackContainers } from '@shared/models/llama-stack/LlamaStackContainerInfo'; import { LLAMA_STACK_CONTAINER_TRACKINGID } from '@shared/models/llama-stack/LlamaStackContainerInfo'; import type { LlamaStackContainerConfiguration } from '@shared/models/llama-stack/LlamaStackContainerConfiguration'; import path from 'node:path'; import fs from 'node:fs/promises'; import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry'; import { getFreeRandomPort } from '../../utils/ports'; import { TaskRunner } from '../TaskRunner'; import type { ModelsManager } from '../modelsManager'; import { getPodmanCli, getPodmanMachineName } from '../../utils/podman'; export const LLAMA_STACK_CONTAINER_LABEL = 'ai-lab-llama-stack-container'; export const LLAMA_STACK_API_PORT_LABEL = 'ai-lab-llama-stack-api-port'; export const LLAMA_STACK_PLAYGROUND_PORT_LABEL = 'ai-lab-llama-stack-playground-port'; export const SECOND: number = 1_000_000_000; /* * Get the local IP address of the Podman machine. * See https://learn.microsoft.com/en-us/windows/wsl/networking */ async function getLocalIPAddress(connection: ContainerProviderConnection): Promise { const cli = getPodmanCli(); const machineName = getPodmanMachineName(connection); const result = await process.exec(cli, [ 'machine', 'ssh', machineName, 'ip', 'route', 'show', '|', 'grep', '-i', 'default', '|', 'awk', // eslint-disable-next-line quotes "'{print $3}'", ]); return result.stdout.trim(); } export class LlamaStackManager implements Disposable { #initialized: boolean; #stack_containers: LlamaStackContainers | undefined; #creationInProgress = false; #disposables: Disposable[]; #taskRunner: TaskRunner; constructor( private readonly appUserDirectory: string, private taskRegistry: TaskRegistry, private podmanConnection: PodmanConnection, private containerRegistry: ContainerRegistry, private configurationRegistry: ConfigurationRegistry, private telemetryLogger: TelemetryLogger, private modelsManager: ModelsManager, ) { this.#initialized = false; this.#disposables = []; this.#taskRunner = new TaskRunner(this.taskRegistry); } init(): void { this.#disposables.push(this.podmanConnection.onPodmanConnectionEvent(this.watchMachineEvent.bind(this))); this.#disposables.push(this.containerRegistry.onStartContainerEvent(this.onStartContainerEvent.bind(this))); this.#disposables.push(this.containerRegistry.onStopContainerEvent(this.onStopContainerEvent.bind(this))); } dispose(): void { this.#disposables.forEach(disposable => disposable.dispose()); this.#disposables = []; } private async watchMachineEvent(event: PodmanConnectionEvent): Promise { if ( (event.status === 'started' && (!this.#stack_containers?.server || !this.#stack_containers?.playground)) || (event.status === 'stopped' && (this.#stack_containers?.server || this.#stack_containers?.playground)) ) { await this.refreshLlamaStackContainers(); } } private async onStartContainerEvent(): Promise { await this.refreshLlamaStackContainers(); } private async onStopContainerEvent(event: ContainerEvent): Promise { const serverId = this.#stack_containers?.server?.containerId; const playgroundId = this.#stack_containers?.playground?.containerId; if (this.#creationInProgress) return; if (serverId === event.id || playgroundId === event.id) { this.#stack_containers = undefined; this.taskRegistry.deleteByLabels({ trackingId: LLAMA_STACK_CONTAINER_TRACKINGID }); } await this.refreshLlamaStackContainers(); } /** * getLlamaStackContainers returns the first running container with a Llama Stack label. * The container is searched only the first time and the result is cached for subsequent calls. * * Returns undefined if no container is found */ async getLlamaStackContainers(): Promise { if (!this.#initialized) { await this.refreshLlamaStackContainers(); this.#initialized = true; } return this.#stack_containers; } /** * refreshLlamaStackContainers refreshes the container info. * It is called when the machine is started or when a container is stopped. */ protected async refreshLlamaStackContainers(): Promise { const containers = await containerEngine.listContainers(); const serverContainer = containers.find(c => c.Labels && LLAMA_STACK_API_PORT_LABEL in c.Labels); let serverInfo: LlamaStackContainerInfo | undefined; if (serverContainer) { serverInfo = { containerId: serverContainer.Id, port: parseInt(serverContainer.Labels[LLAMA_STACK_API_PORT_LABEL], 10), state: serverContainer.State, }; } const playgroundContainer = containers.find(c => c.Labels && LLAMA_STACK_PLAYGROUND_PORT_LABEL in c.Labels); let playgroundInfo: LlamaStackContainerInfo | undefined; if (playgroundContainer) { playgroundInfo = { containerId: playgroundContainer.Id, port: parseInt(playgroundContainer.Labels[LLAMA_STACK_PLAYGROUND_PORT_LABEL], 10), state: playgroundContainer.State, }; } this.#stack_containers = { server: serverInfo, playground: playgroundInfo, }; } /** * requestcreateLlamaStackContainerss creates the Llama Stack containers. * It is called when the user clicks the "Start" button. * * Flowchart for checking containers and handling them: * * Server exists * ├─ Playground exists * │ └─ Start both * └─ Playground doesn't exist * └─ Create new playground * * Server doesn't exist * ├─ Playground exists * │ └─ Delete playground and update state * └─ Playground doesn't exist * └─ Create both */ async requestcreateLlamaStackContainerss(config: LlamaStackContainerConfiguration): Promise { const connection: ContainerProviderConnection | undefined = config.connection ? this.podmanConnection.getContainerProviderConnection(config.connection) : this.podmanConnection.findRunningContainerProviderConnection(); if (!connection) throw new Error('Cannot find running container provider connection'); const labels = { trackingId: LLAMA_STACK_CONTAINER_TRACKINGID }; const containers = await containerEngine.listContainers(); const server = containers.find(c => c.Labels && LLAMA_STACK_API_PORT_LABEL in c.Labels); const playground = containers.find(c => c.Labels && LLAMA_STACK_PLAYGROUND_PORT_LABEL in c.Labels); try { if (server) { if (playground) { await this.startBoth(server, playground, labels); } else { await this.createPlaygroundFromServer(server, labels, connection); } } else { this.#creationInProgress = true; await this.createBoth(playground, labels, connection); this.#creationInProgress = false; } } catch (err) { this.telemetryLogger.logError('llamaStack.startContainer', { error: err }); } } /** * Helper: Both server and playground exist → start both */ private async startBoth( server: ContainerInfo, playground: ContainerInfo, labels: { [p: string]: string }, ): Promise { await this.#taskRunner.runAsTask( labels, { loadingLabel: 'Starting Server and/or Playground', errorMsg: err => `Failed to start existing containers: ${String(err)}`, }, async ({ updateLabels }) => { if (server.State !== 'running') await containerEngine.startContainer(server.engineId, server.Id); if (playground.State !== 'running') await containerEngine.startContainer(playground.engineId, playground.Id); const serverInfo = await this.waitLlamaStackServerHealthy( { containerId: server.Id, port: parseInt(server.Labels[LLAMA_STACK_API_PORT_LABEL], 10), state: server.State, }, labels, ); this.#stack_containers = { server: serverInfo, playground: { containerId: playground.Id, port: parseInt(playground.Labels[LLAMA_STACK_PLAYGROUND_PORT_LABEL], 10), state: 'running', }, }; updateLabels(l => ({ ...l, containerId: serverInfo.containerId, port: `${serverInfo.port}`, state: serverInfo.state, playgroundId: playground.Id, playgroundPort: `${parseInt(playground.Labels[LLAMA_STACK_PLAYGROUND_PORT_LABEL], 10)}`, playgroundState: 'running', })); this.telemetryLogger.logUsage('llamaStack.startContainer'); }, ); } /** * Helper: Only server exists → create playground */ private async createPlaygroundFromServer( server: ContainerInfo, labels: { [p: string]: string }, connection: ContainerProviderConnection, ): Promise { await this.#taskRunner.runAsTask( labels, { loadingLabel: 'Creating Playground container', errorMsg: err => `Failed to create playground: ${String(err)}`, }, async ({ updateLabels }) => { if (server.State !== 'running') await containerEngine.startContainer(server.engineId, server.Id); const serverInfo = await this.waitLlamaStackServerHealthy( { containerId: server.Id, port: parseInt(server.Labels[LLAMA_STACK_API_PORT_LABEL], 10), state: server.State, }, labels, ); const playgroundInfo = await this.createPlaygroundContainer(serverInfo, labels, connection); this.#stack_containers = { server: serverInfo, playground: playgroundInfo }; updateLabels(l => ({ ...l, containerId: serverInfo.containerId, port: `${serverInfo.port}`, state: serverInfo.state, playgroundId: playgroundInfo.containerId, playgroundPort: `${playgroundInfo.port}`, playgroundState: playgroundInfo.state, })); this.telemetryLogger.logUsage('llamaStack.startContainer'); }, ); } /** * Helper: Only playground exists → delete it and create both containers */ private async createBoth( playground: ContainerInfo | undefined, labels: { [p: string]: string }, connection: ContainerProviderConnection, ): Promise { await this.#taskRunner.runAsTask( labels, { loadingLabel: 'Creating Server and Playground', errorMsg: err => `Failed to create Llama Stack containers: ${String(err)}`, failFastSubtasks: true, }, async ({ updateLabels }) => { // If playground exists, stop & delete it if (playground) { if (playground.State === 'running') { await containerEngine.stopContainer(playground.engineId, playground.Id); } await containerEngine.deleteContainer(playground.engineId, playground.Id); } // Create new server + playground const stackInfo = await this.createLlamaStackContainers(connection, labels); this.#stack_containers = stackInfo; // Update task labels for UI updateLabels(l => ({ ...l, containerId: stackInfo.server?.containerId ?? '', port: `${stackInfo.server?.port}`, state: stackInfo.server?.state ?? '', playgroundId: stackInfo.playground?.containerId ?? '', playgroundPort: `${stackInfo.playground?.port}`, playgroundState: stackInfo.playground?.state ?? '', })); this.telemetryLogger.logUsage('llamaStack.startContainer'); }, ); } async createLlamaStackContainers( connection: ContainerProviderConnection, labels: { [p: string]: string }, ): Promise { const image = llama_stack_images.default; const imageInfo = await this.#taskRunner.runAsTask( labels, { loadingLabel: `Pulling ${image}.`, errorMsg: err => `Something went wrong while pulling ${image}: ${String(err)}`, }, () => getImageInfo(connection, image, () => {}), ); // Create the server container let serverInfo = await this.createServerContainer(connection, image, imageInfo, labels); serverInfo = await this.waitLlamaStackServerHealthy(serverInfo, labels); serverInfo = await this.registerModels(serverInfo, labels, connection); const playgroundInfo = await this.createPlaygroundContainer(serverInfo, labels, connection); // Return both in proper interface return { server: serverInfo, playground: playgroundInfo, }; } private async createServerContainer( connection: ContainerProviderConnection, image: string, imageInfo: ImageInfo, labels: { [p: string]: string }, ): Promise { const folder = await this.getLlamaStackContainersFolder(); const aiLabApiHost = env.isWindows && connection.vmType === 'wsl' ? await getLocalIPAddress(connection) : 'host.docker.internal'; const aiLabApiPort = this.configurationRegistry.getExtensionConfiguration().apiPort; const llamaStackApiPort = await getFreeRandomPort('0.0.0.0'); const createContainerOptions: ContainerCreateOptions = { Image: imageInfo.Id, name: getRandomName('llama-stack'), Labels: { [LLAMA_STACK_CONTAINER_LABEL]: image, [LLAMA_STACK_API_PORT_LABEL]: `${llamaStackApiPort}`, }, HostConfig: { AutoRemove: false, SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION], Mounts: [ { Target: '/app/.llama', Source: path.join(folder, '.llama'), Type: 'bind', }, ], UsernsMode: 'keep-id:uid=0,gid=0', PortBindings: { '8321/tcp': [{ HostPort: `${llamaStackApiPort}` }] }, }, Env: [`PODMAN_AI_LAB_URL=http://${aiLabApiHost}:${aiLabApiPort}`], OpenStdin: true, start: true, HealthCheck: { Test: ['CMD-SHELL', `curl -sSf localhost:8321/v1/models > /dev/null`], Interval: SECOND * 5, Retries: 20, }, }; return this.#taskRunner.runAsTask( labels, { loadingLabel: 'Starting Llama Stack server container', errorMsg: err => `Something went wrong while creating server container: ${String(err)}`, }, async () => { const { id } = await containerEngine.createContainer(imageInfo.engineId, createContainerOptions); return { containerId: id, port: llamaStackApiPort, state: 'starting', }; }, ); } async waitLlamaStackServerHealthy( serverInfo: LlamaStackContainerInfo, labels: { [p: string]: string }, ): Promise { return this.#taskRunner.runAsTask( labels, { loadingLabel: 'Waiting for Llama Stack server to be healthy', errorMsg: err => `Something went wrong while checking server health: ${String(err)}`, }, () => new Promise((resolve, _reject) => { const disposable = this.containerRegistry.onHealthyContainerEvent((event: ContainerHealthy) => { if (event.id !== serverInfo.containerId) return; disposable.dispose(); serverInfo.state = 'running'; this.telemetryLogger.logUsage('llamaStack.startContainer'); resolve(serverInfo); }); }), ); } async registerModels( serverInfo: LlamaStackContainerInfo, labels: { [p: string]: string }, connection: ContainerProviderConnection, ): Promise { for (const model of this.modelsManager.getModelsInfo().filter(model => model.file)) { await this.#taskRunner.runAsTask( labels, { loadingLabel: `Registering model ${model.name}`, errorMsg: err => `Something went wrong while registering model: ${String(err)}`, }, async () => { await this.podmanConnection.execute(connection, [ 'exec', serverInfo.containerId, 'llama-stack-client', 'models', 'register', model.name, '--provider-id', 'podman-ai-lab', ]); }, ); } return serverInfo; } private async createPlaygroundContainer( serverInfo: LlamaStackContainerInfo, labels: { [p: string]: string }, connection: ContainerProviderConnection, ): Promise { const image = llama_stack_playground_images.default; const imageInfo = await this.#taskRunner.runAsTask( labels, { loadingLabel: `Pulling ${image}.`, errorMsg: err => `Something went wrong while pulling ${image}: ${String(err)}`, }, () => getImageInfo(connection, image, () => {}), ); const playgroundPort = await getFreeRandomPort('0.0.0.0'); const createContainerOptions: ContainerCreateOptions = { Image: imageInfo.Id, name: getRandomName('llama-stack-playground'), Labels: { [LLAMA_STACK_CONTAINER_LABEL]: image, [LLAMA_STACK_PLAYGROUND_PORT_LABEL]: `${playgroundPort}`, }, HostConfig: { AutoRemove: false, PortBindings: { '8501/tcp': [{ HostPort: `${playgroundPort}` }] }, }, Env: [`LLAMA_STACK_ENDPOINT=http://host.containers.internal:${serverInfo.port}`], OpenStdin: true, start: true, }; return this.#taskRunner.runAsTask( labels, { loadingLabel: 'Starting Llama Stack Playground container', errorMsg: err => `Something went wrong while creating playground container: ${String(err)}`, }, async () => { const { id } = await containerEngine.createContainer(imageInfo.engineId, createContainerOptions); return { containerId: id, port: playgroundPort, state: 'running', }; }, ); } private async getLlamaStackContainersFolder(): Promise { const llamaStackPath = path.join(this.appUserDirectory, 'llama-stack', 'container'); await fs.mkdir(path.join(llamaStackPath, '.llama'), { recursive: true }); return llamaStackPath; } // For tests only protected getContainersInfo(): LlamaStackContainers | undefined { return this.#stack_containers; } } ================================================ FILE: packages/backend/src/managers/modelsManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type MockInstance, beforeEach, describe, expect, test, vi } from 'vitest'; import os from 'node:os'; import fs, { type Stats, type PathLike } from 'node:fs'; import path from 'node:path'; import { ModelsManager } from './modelsManager'; import { env, process as coreProcess } from '@podman-desktop/api'; import type { RunResult, TelemetryLogger, ContainerProviderConnection } from '@podman-desktop/api'; import type { CatalogManager } from './catalogManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import * as utils from '../utils/utils'; import { TaskRegistry } from '../registries/TaskRegistry'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; import * as sha from '../utils/sha'; import type { GGUFParseOutput } from '@huggingface/gguf'; import { gguf } from '@huggingface/gguf'; import type { PodmanConnection } from './podmanConnection'; import { VMType } from '@shared/models/IPodman'; import { getPodmanMachineName } from '../utils/podman'; import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry'; import { Uploader } from '../utils/uploader'; import { ModelHandlerRegistry } from '../registries/ModelHandlerRegistry'; import { URLModelHandler } from '../models/URLModelHandler'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_NEW_MODELS_STATE } from '@shared/Messages'; const mocks = vi.hoisted(() => { return { showErrorMessageMock: vi.fn(), logUsageMock: vi.fn(), logErrorMock: vi.fn(), performDownloadMock: vi.fn(), onEventDownloadMock: vi.fn(), getTargetMock: vi.fn(), getDownloaderCompleter: vi.fn(), isCompletionEventMock: vi.fn(), getPodmanCliMock: vi.fn(), }; }); vi.mock('../utils/uploader', () => ({ Uploader: vi.fn(), })); vi.mock('@huggingface/gguf', () => ({ gguf: vi.fn(), })); vi.mock('../utils/podman', () => ({ getPodmanCli: mocks.getPodmanCliMock, getPodmanMachineName: vi.fn(), })); vi.mock('@podman-desktop/api', () => { return { Disposable: { create: vi.fn(), }, env: { isWindows: false, }, process: { exec: vi.fn(), }, fs: { createFileSystemWatcher: (): unknown => ({ onDidCreate: vi.fn(), onDidDelete: vi.fn(), onDidChange: vi.fn(), }), }, window: { showErrorMessage: mocks.showErrorMessageMock, }, EventEmitter: vi.fn(), }; }); vi.mock('../utils/downloader', () => ({ isCompletionEvent: mocks.isCompletionEventMock, Downloader: class { get completed(): boolean { return mocks.getDownloaderCompleter(); } onEvent = mocks.onEventDownloadMock; perform = mocks.performDownloadMock; getTarget = mocks.getTargetMock; }, })); const podmanConnectionMock = { getContainerProviderConnections: vi.fn(), } as unknown as PodmanConnection; const cancellationTokenRegistryMock = { createCancellationTokenSource: vi.fn(), } as unknown as CancellationTokenRegistry; let taskRegistry: TaskRegistry; const telemetryLogger = { logUsage: mocks.logUsageMock, logError: mocks.logErrorMock, } as unknown as TelemetryLogger; const configurationRegistryMock: ConfigurationRegistry = { getExtensionConfiguration: vi.fn(), } as unknown as ConfigurationRegistry; let modelHandlerRegistry: ModelHandlerRegistry; const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); taskRegistry = new TaskRegistry(rpcExtensionMock); modelHandlerRegistry = new ModelHandlerRegistry(rpcExtensionMock); vi.mocked(configurationRegistryMock.getExtensionConfiguration).mockReturnValue({ modelUploadDisabled: false, modelsPath: '~/downloads', experimentalTuning: false, apiPort: 0, inferenceRuntime: 'llama-cpp', experimentalGPU: false, showGPUPromotion: false, appearance: 'dark', }); mocks.isCompletionEventMock.mockReturnValue(true); }); const dirent = [ { isDirectory: (): boolean => true, parentPath: '/home/user/appstudio-dir', name: 'model-id-1', }, { isDirectory: (): boolean => true, parentPath: '/home/user/appstudio-dir', name: 'model-id-2', }, { isDirectory: (): boolean => false, parentPath: '/home/user/appstudio-dir', name: 'other-file-should-be-ignored.txt', }, ] as fs.Dirent[]; function mockFiles(now: Date): void { vi.spyOn(os, 'homedir').mockReturnValue('/home/user'); const existsSyncSpy = vi.spyOn(fs, 'existsSync'); existsSyncSpy.mockImplementation((path: PathLike) => { if (process.platform === 'win32') { expect(path).toBe('C:\\home\\user\\aistudio\\models'); } else { expect(path).toBe('/home/user/aistudio/models'); } return true; }); const statSpy = vi.spyOn(fs.promises, 'stat'); const info: Stats = {} as Stats; info.size = 32000; info.mtime = now; statSpy.mockResolvedValue(info); const readdirMock = vi.spyOn(fs.promises, 'readdir') as unknown as MockInstance< (path: string) => Promise >; readdirMock.mockImplementation((dir: string) => { if (dir.endsWith('model-id-1') || dir.endsWith('model-id-2')) { const base = path.basename(dir); return Promise.resolve([base + '-model']); } else { return Promise.resolve(dirent); } }); } test('getModelsInfo should get models in local directory', async () => { const now = new Date(); mockFiles(now); let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return [ { id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo, { id: 'model-id-2', name: 'model-id-2-model' } as ModelInfo, ]; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.loadLocalModels(); expect(manager.getModelsInfo()).toEqual([ { id: 'model-id-1', name: 'model-id-1-model', file: { size: 32000, creation: now, path: path.resolve(dirent[0].parentPath, dirent[0].name), file: 'model-id-1-model', }, }, { id: 'model-id-2', name: 'model-id-2-model', file: { size: 32000, creation: now, path: path.resolve(dirent[1].parentPath, dirent[1].name), file: 'model-id-2-model', }, }, ]); }); test('getModelsInfo should return an empty array if the models folder does not exist', async () => { vi.spyOn(os, 'homedir').mockReturnValue('/home/user'); const existsSyncSpy = vi.spyOn(fs, 'existsSync'); existsSyncSpy.mockReturnValue(false); let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return []; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.getLocalModelsFromDisk(); expect(manager.getModelsInfo()).toEqual([]); if (process.platform === 'win32') { expect(existsSyncSpy).toHaveBeenCalledWith('C:\\home\\user\\aistudio\\models'); } else { expect(existsSyncSpy).toHaveBeenCalledWith('/home/user/aistudio/models'); } }); test('getLocalModelsFromDisk should return undefined Date and size when stat fail', async () => { const now = new Date(); mockFiles(now); const statSpy = vi.spyOn(fs.promises, 'stat') as unknown as MockInstance<(path: PathLike) => Promise>; statSpy.mockImplementation((path: PathLike) => { if (`${path}`.endsWith('model-id-1')) throw new Error('random-error'); return Promise.resolve({ isDirectory: () => true } as Stats); }); let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return [{ id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo]; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.loadLocalModels(); expect(manager.getModelsInfo()).toEqual([ { id: 'model-id-1', name: 'model-id-1-model', file: { size: undefined, creation: undefined, path: path.resolve(dirent[0].parentPath, dirent[0].name), file: 'model-id-1-model', }, }, ]); }); test('getLocalModelsFromDisk should skip folders containing tmp files', async () => { const now = new Date(); mockFiles(now); const statSpy = vi.spyOn(fs.promises, 'stat') as unknown as MockInstance<(path: PathLike) => Promise>; statSpy.mockImplementation((path: PathLike) => { if (`${path}`.endsWith('model-id-1')) throw new Error('random-error'); return Promise.resolve({ isDirectory: () => true } as Stats); }); const readdirMock = vi.spyOn(fs.promises, 'readdir') as unknown as MockInstance< (path: string) => Promise >; readdirMock.mockImplementation((dir: string) => { if (dir.endsWith('model-id-1') || dir.endsWith('model-id-2')) { const base = path.basename(dir); return Promise.resolve([base + '-model.tmp']); } else { return Promise.resolve(dirent); } }); let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return [{ id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo]; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.loadLocalModels(); expect(manager.getModelsInfo()).toEqual([ { id: 'model-id-1', name: 'model-id-1-model', }, ]); }); test('loadLocalModels should post a message with the message on disk and on catalog', async () => { const now = new Date(); mockFiles(now); let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const manager = new ModelsManager( rpcExtensionMock, { getModels: () => { return [ { id: 'model-id-1', }, ] as ModelInfo[]; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.loadLocalModels(); expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(2, MSG_NEW_MODELS_STATE, [ { file: { creation: now, file: 'model-id-1-model', size: 32000, path: path.resolve(dirent[0].parentPath, dirent[0].name), }, id: 'model-id-1', }, ]); }); test('deleteModel deletes the model folder', async () => { let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const now = new Date(); mockFiles(now); const rmSpy = vi.spyOn(fs.promises, 'rm'); rmSpy.mockResolvedValue(); const manager = new ModelsManager( rpcExtensionMock, { getModels: () => { return [ { id: 'model-id-1', url: 'https:///model-url', }, ] as ModelInfo[]; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.loadLocalModels(); await manager.deleteModel('model-id-1'); // check that the model's folder is removed from disk if (process.platform === 'win32') { expect(rmSpy).toBeCalledWith('C:\\home\\user\\aistudio\\models\\model-id-1', { recursive: true, force: true, maxRetries: 3, }); } else { expect(rmSpy).toBeCalledWith('/home/user/aistudio/models/model-id-1', { recursive: true, force: true, maxRetries: 3, }); } expect(rpcExtensionMock.fire).toHaveBeenCalledTimes(5); // check that a new state is sent with the model removed expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(4, MSG_NEW_MODELS_STATE, [ { id: 'model-id-1', url: 'https:///model-url', }, ]); expect(mocks.logUsageMock).toHaveBeenNthCalledWith(1, 'model.delete', { 'model.id': expect.any(String) }); }); describe('deleting models', () => { test('deleteModel fails to delete the model folder', async () => { let modelsDir: string; if (process.platform === 'win32') { modelsDir = 'C:\\home\\user\\aistudio\\models'; } else { modelsDir = '/home/user/aistudio/models'; } const now = new Date(); mockFiles(now); const rmSpy = vi.spyOn(fs.promises, 'rm'); rmSpy.mockRejectedValue(new Error('failed')); const manager = new ModelsManager( rpcExtensionMock, { getModels: () => { return [ { id: 'model-id-1', url: 'https://model-url', }, ] as ModelInfo[]; }, onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir)); await manager.init(); await manager.loadLocalModels(); await manager.deleteModel('model-id-1'); // check that the model's folder is removed from disk if (process.platform === 'win32') { expect(rmSpy).toBeCalledWith('C:\\home\\user\\aistudio\\models\\model-id-1', { recursive: true, force: true, maxRetries: 3, }); } else { expect(rmSpy).toBeCalledWith('/home/user/aistudio/models/model-id-1', { recursive: true, force: true, maxRetries: 3, }); } expect(rpcExtensionMock.fire).toHaveBeenCalledTimes(5); // check that a new state is sent with the model non removed expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(4, MSG_NEW_MODELS_STATE, [ { id: 'model-id-1', url: 'https://model-url', file: { creation: now, file: 'model-id-1-model', size: 32000, path: path.resolve(dirent[0].parentPath, dirent[0].name), }, }, ]); expect(mocks.showErrorMessageMock).toHaveBeenCalledOnce(); expect(mocks.logErrorMock).toHaveBeenCalled(); }); test('delete local model should call catalogManager', async () => { vi.mocked(env).isWindows = false; const removeUserModelMock = vi.fn(); const manager = new ModelsManager( rpcExtensionMock, { getModels: () => { return [ { id: 'model-id-1', file: { file: 'model-id-1-model', size: 32000, path: path.resolve(dirent[0].parentPath, dirent[0].name), }, }, ] as ModelInfo[]; }, removeUserModel: removeUserModelMock, } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await manager.loadLocalModels(); await manager.deleteModel('model-id-1'); expect(removeUserModelMock).toBeCalledWith('model-id-1'); }); test('deleting on windows should check for all connections', async () => { vi.mocked(coreProcess.exec).mockResolvedValue({} as RunResult); mocks.getPodmanCliMock.mockReturnValue('dummyCli'); vi.mocked(env).isWindows = true; const connections: ContainerProviderConnection[] = [ { name: 'Machine 1', type: 'podman', vmType: VMType.HYPERV, endpoint: { socketPath: '', }, status: () => 'started', }, { name: 'Machine 2', type: 'podman', vmType: VMType.WSL, endpoint: { socketPath: '', }, status: () => 'started', }, ]; vi.mocked(podmanConnectionMock.getContainerProviderConnections).mockReturnValue(connections); vi.mocked(getPodmanMachineName).mockReturnValue('machine-2'); const rmSpy = vi.spyOn(fs.promises, 'rm'); rmSpy.mockResolvedValue(undefined); const manager = new ModelsManager( rpcExtensionMock, { getModels: () => { return [ { id: 'model-id-1', url: 'model-url', file: { file: 'dummyFile', path: 'dummyPath', }, }, ] as ModelInfo[]; }, } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await manager.loadLocalModels(); // delete the model await manager.deleteModel('model-id-1'); expect(podmanConnectionMock.getContainerProviderConnections).toHaveBeenCalledOnce(); expect(coreProcess.exec).toHaveBeenCalledWith('dummyCli', [ 'machine', 'ssh', 'machine-2', 'rm', '-f', '/home/user/ai-lab/models/model-id-1', ]); }); }); describe('downloadModel', () => { test('download model if not already on disk', async () => { vi.mocked(cancellationTokenRegistryMock.createCancellationTokenSource).mockReturnValue(99); const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return []; }, } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, 'appdir')); vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(false); vi.spyOn(utils, 'getDurationSecondsSince').mockReturnValue(99); const updateTaskMock = vi.spyOn(taskRegistry, 'updateTask'); await manager.requestDownloadModel({ id: 'id', url: 'https:///url', name: 'name', } as ModelInfo); expect(cancellationTokenRegistryMock.createCancellationTokenSource).toHaveBeenCalled(); expect(updateTaskMock).toHaveBeenLastCalledWith({ id: expect.any(String), name: 'Downloading model name', labels: { 'model-pulling': 'id', }, state: 'loading', cancellationToken: 99, }); }); test('retrieve model path if already on disk', async () => { const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return []; }, } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); const updateTaskMock = vi.spyOn(taskRegistry, 'updateTask'); vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true); const getLocalModelPathMock = vi.spyOn(manager, 'getLocalModelPath').mockReturnValue(''); await manager.requestDownloadModel({ id: 'id', url: 'url', name: 'name', } as ModelInfo); expect(getLocalModelPathMock).toBeCalledWith('id'); expect(updateTaskMock).toHaveBeenLastCalledWith({ id: expect.any(String), name: 'Model name already present on disk', labels: { 'model-pulling': 'id', }, state: 'success', }); }); test('fail if model on disk has different sha of the expected value', async () => { const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return []; }, } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); vi.spyOn(taskRegistry, 'updateTask'); vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true); vi.spyOn(manager, 'getLocalModelPath').mockReturnValue('path'); vi.spyOn(sha, 'hasValidSha').mockResolvedValue(false); await expect(() => manager.requestDownloadModel({ id: 'id', url: 'url', name: 'name', sha256: 'sha', } as ModelInfo), ).rejects.toThrowError( 'Model name is already present on disk at path but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.', ); }); test('multiple download request same model - second call after first completed', async () => { mocks.getDownloaderCompleter.mockReturnValue(true); const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return []; }, } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, 'appdir')); vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(false); vi.spyOn(utils, 'getDurationSecondsSince').mockReturnValue(99); await manager.requestDownloadModel({ id: 'id', url: 'https:///url', name: 'name', } as ModelInfo); await manager.requestDownloadModel({ id: 'id', url: 'https:///url', name: 'name', } as ModelInfo); // Only called once expect(mocks.performDownloadMock).toHaveBeenCalledTimes(1); expect(mocks.onEventDownloadMock).toHaveBeenCalledTimes(1); }); test('multiple download request same model - second call before first completed', async () => { mocks.getDownloaderCompleter.mockReturnValue(false); const manager = new ModelsManager( rpcExtensionMock, { getModels(): ModelInfo[] { return []; }, } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); modelHandlerRegistry.register(new URLModelHandler(manager, 'appdir')); vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(false); vi.spyOn(utils, 'getDurationSecondsSince').mockReturnValue(99); mocks.onEventDownloadMock.mockImplementation(listener => { setTimeout(() => { listener({ id: 'id', status: 'completed', duration: 1000, }); }, 1000); return { dispose: vi.fn(), }; }); await manager.requestDownloadModel({ id: 'id', url: 'https:///url', name: 'name', } as ModelInfo); await manager.requestDownloadModel({ id: 'id', url: 'https:///url', name: 'name', } as ModelInfo); // Only called once expect(mocks.performDownloadMock).toHaveBeenCalledTimes(1); expect(mocks.onEventDownloadMock).toHaveBeenCalledTimes(2); }); }); describe('getModelMetadata', () => { test('unknown model', async () => { const manager = new ModelsManager( rpcExtensionMock, { getModels: (): ModelInfo[] => [], } as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await expect(() => manager.getModelMetadata('unknown-model-id')).rejects.toThrowError( 'model with id unknown-model-id does not exists.', ); }); test('remote model', async () => { const manager = new ModelsManager( {} as RpcExtension, { getModels: (): ModelInfo[] => [ { id: 'test-model-id', url: 'dummy-url', file: undefined, } as unknown as ModelInfo, ], onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await manager.init(); const fakeMetadata: Record = { hello: 'world', }; vi.mocked(gguf).mockResolvedValue({ metadata: fakeMetadata, } as unknown as GGUFParseOutput & { parameterCount: number }); const result = await manager.getModelMetadata('test-model-id'); expect(result).toStrictEqual(fakeMetadata); expect(gguf).toHaveBeenCalledWith('dummy-url'); }); test('local model', async () => { const manager = new ModelsManager( rpcExtensionMock, { getModels: (): ModelInfo[] => [ { id: 'test-model-id', url: 'dummy-url', file: { file: 'random', path: 'dummy-path', }, } as unknown as ModelInfo, ], onUpdate: vi.fn(), } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await manager.init(); const fakeMetadata: Record = { hello: 'world', }; vi.mocked(gguf).mockResolvedValue({ metadata: fakeMetadata, } as unknown as GGUFParseOutput & { parameterCount: number }); const result = await manager.getModelMetadata('test-model-id'); expect(result).toStrictEqual(fakeMetadata); expect(gguf).toHaveBeenCalledWith(path.join('dummy-path', 'random'), { allowLocalFile: true, }); }); }); const connectionMock: ContainerProviderConnection = { name: 'dummy-connection', type: 'podman', vmType: undefined, } as unknown as ContainerProviderConnection; const modelMock: ModelInfo = { id: 'test-model-id', url: 'dummy-url', file: { file: 'random', path: 'dummy-path', }, } as unknown as ModelInfo; describe('uploadModelToPodmanMachine', () => { test('uploader should be used', async () => { const performMock = vi.fn().mockResolvedValue('uploader-result'); vi.mocked(Uploader).mockReturnValue({ onEvent: vi.fn(), perform: performMock, } as unknown as Uploader); const manager = new ModelsManager( rpcExtensionMock, { onUpdate: vi.fn(), getModels: () => [], } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await manager.init(); const result = await manager.uploadModelToPodmanMachine(connectionMock, modelMock); expect(result).toBe('uploader-result'); expect(performMock).toHaveBeenCalledWith(modelMock.id); }); test('upload should be skipped when configuration disable it', async () => { vi.mocked(configurationRegistryMock.getExtensionConfiguration).mockReturnValue({ // disable upload modelUploadDisabled: true, modelsPath: '~/downloads', experimentalTuning: false, apiPort: 0, inferenceRuntime: 'llama-cpp', experimentalGPU: false, showGPUPromotion: false, appearance: 'dark', }); const manager = new ModelsManager( rpcExtensionMock, { onUpdate: vi.fn(), getModels: () => [], } as unknown as CatalogManager, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); await manager.init(); await manager.uploadModelToPodmanMachine(connectionMock, modelMock); expect(Uploader).not.toHaveBeenCalled(); }); }); ================================================ FILE: packages/backend/src/managers/modelsManager.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { LocalModelInfo } from '@shared/models/ILocalModelInfo'; import fs from 'node:fs'; import * as path from 'node:path'; import { type Disposable, env, type ContainerProviderConnection } from '@podman-desktop/api'; import { MSG_NEW_MODELS_STATE } from '@shared/Messages'; import type { CatalogManager } from './catalogManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import * as podmanDesktopApi from '@podman-desktop/api'; import type { Downloader } from '../utils/downloader'; import type { TaskRegistry } from '../registries/TaskRegistry'; import type { Task } from '@shared/models/ITask'; import type { BaseEvent } from '../models/baseEvent'; import { isCompletionEvent, isProgressEvent } from '../models/baseEvent'; import { Uploader } from '../utils/uploader'; import { deleteRemoteModel, getLocalModelFile, isModelUploaded } from '../utils/modelsUtils'; import { getPodmanMachineName } from '../utils/podman'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; import { getHash, hasValidSha } from '../utils/sha'; import type { GGUFParseOutput } from '@huggingface/gguf'; import { gguf } from '@huggingface/gguf'; import type { PodmanConnection } from './podmanConnection'; import { VMType } from '@shared/models/IPodman'; import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry'; import type { ModelHandlerRegistry } from '../registries/ModelHandlerRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export class ModelsManager implements Disposable { #models: Map; #disposables: Disposable[]; #downloaders: Map = new Map(); constructor( private rpcExtension: RpcExtension, private catalogManager: CatalogManager, private telemetry: podmanDesktopApi.TelemetryLogger, private taskRegistry: TaskRegistry, private cancellationTokenRegistry: CancellationTokenRegistry, private podmanConnection: PodmanConnection, private configurationRegistry: ConfigurationRegistry, private modelHandlerRegistry: ModelHandlerRegistry, ) { this.#models = new Map(); this.#disposables = []; this.modelHandlerRegistry.getAll().forEach(handler => handler.onUpdate(this.loadLocalModels)); } async init(): Promise { const disposable = this.catalogManager.onUpdate(() => { this.loadLocalModels().catch((err: unknown) => { console.error(`Something went wrong when loading local models`, err); }); }); this.#disposables.push(disposable); try { await this.loadLocalModels(); } catch (err: unknown) { console.error('Something went wrong while trying to load local models', err); } } dispose(): void { this.#models.clear(); this.#disposables.forEach(d => d.dispose()); } async loadLocalModels(): Promise { this.#models.clear(); this.catalogManager.getModels().forEach(m => this.#models.set(m.id, m)); const reloadLocalModels = async (): Promise => { await this.getLocalModelsFromDisk(); await this.sendModelsInfo(); }; // Initialize the local models manually await reloadLocalModels(); } getModelsInfo(): ModelInfo[] { return [...this.#models.values()]; } async sendModelsInfo(): Promise { const models = this.getModelsInfo(); await this.rpcExtension.fire(MSG_NEW_MODELS_STATE, models); } async getLocalModelsFromDisk(): Promise { return Promise.all(this.modelHandlerRegistry.getAll().map(registry => registry.getLocalModelsFromDisk())).then( () => void 0, ); } isModelOnDisk(modelId: string): boolean { return this.#models.get(modelId)?.file !== undefined; } getLocalModelInfo(modelId: string): LocalModelInfo { const model = this.#models.get(modelId); if (!model?.file) { throw new Error('model is not on disk'); } return model.file; } getModelInfo(modelId: string): ModelInfo { const model = this.#models.get(modelId); if (!model) { throw new Error('model is not loaded'); } return model; } getLocalModelPath(modelId: string): string { return getLocalModelFile(this.getModelInfo(modelId)); } async deleteModel(modelId: string): Promise { const model = this.#models.get(modelId); if (!model?.file) { throw new Error('model cannot be found.'); } model.state = 'deleting'; await this.sendModelsInfo(); try { await this.deleteRemoteModel(model); // if model does not have any url, it has been imported locally by the user if (!model.url) { const modelPath = path.join(model.file.path, model.file.file); // remove it from the catalog as it cannot be downloaded anymore await this.catalogManager.removeUserModel(modelId); await fs.promises.rm(modelPath, { recursive: true, force: true, maxRetries: 3 }); } else { const modelHandler = this.modelHandlerRegistry.findModelHandler(model.url); if (!modelHandler) { throw new Error(`no model registry found for model ${model.id} url ${model.url}`); } await modelHandler.deleteModel(model); } this.telemetry.logUsage('model.delete', { 'model.id': getHash(modelId) }); model.file = model.state = undefined; } catch (err: unknown) { this.telemetry.logError('model.delete', { 'model.id': modelId, message: 'error deleting model from disk', error: err, }); await podmanDesktopApi.window.showErrorMessage(`Error deleting model ${modelId}. ${String(err)}`); // Let's reload the models manually to avoid any issue model.state = undefined; await this.getLocalModelsFromDisk(); } finally { await this.sendModelsInfo(); } } private async deleteRemoteModel(modelInfo: ModelInfo): Promise { // currently only Window is supported if (!env.isWindows) { return; } // get all container provider connections const connections = this.podmanConnection.getContainerProviderConnections(); // iterate over all connections for (const connection of connections) { // ignore non-wsl machines if (connection.vmType !== VMType.WSL) continue; // Get the corresponding machine name const machineName = getPodmanMachineName(connection); // check if model already loaded on the podman machine const existsRemote = await isModelUploaded(machineName, modelInfo); if (!existsRemote) return; await deleteRemoteModel(machineName, modelInfo); } } /** * This method will resolve when the provided model will be downloaded. * * This can method can be call multiple time for the same model, it will reuse existing downloader and wait on * their completion. * @param model * @param labels */ async requestDownloadModel(model: ModelInfo, labels?: { [key: string]: string }): Promise { // Create a task to follow progress const task: Task = this.createDownloadTask(model, labels); // Check there is no existing downloader running const existingDownloader = this.#downloaders.get(model.id); if (!existingDownloader) { return this.downloadModel(model, task); } if (existingDownloader.completed) { task.state = 'success'; this.taskRegistry.updateTask(task); return existingDownloader.getTarget(); } // Propagate cancellation token from existing task to the new one task.cancellationToken = this.taskRegistry.findTaskByLabels({ 'model-pulling': model.id })?.cancellationToken; this.taskRegistry.updateTask(task); // If we have an existing downloader running we subscribe on its events return new Promise((resolve, reject) => { const disposable = existingDownloader.onEvent(event => { if (!isCompletionEvent(event)) return; switch (event.status) { case 'completed': resolve(existingDownloader.getTarget()); break; default: reject(new Error(event.message)); } disposable.dispose(); }); }); } private async onDownloadUploadEvent(event: BaseEvent, action: 'download' | 'upload'): Promise { let taskLabel = 'model-pulling'; let eventName = 'model.download'; if (action === 'upload') { taskLabel = 'model-uploading'; eventName = 'model.upload'; } // Always use the task registry as source of truth for tasks const tasks = this.taskRegistry.getTasksByLabels({ [taskLabel]: event.id }); if (tasks.length === 0) { // tasks might have been cleared but still an error. console.error(`received ${action} event but no task is associated.`); return; } for (const task of tasks) { if (isProgressEvent(event)) { task.state = 'loading'; task.progress = event.value; } else if (isCompletionEvent(event)) { // status error or canceled if (event.status === 'error' || event.status === 'canceled') { task.state = 'error'; task.progress = undefined; task.error = event.message; // telemetry usage this.telemetry.logError(eventName, { 'model.id': event.id, message: `error ${action}ing model`, error: event.message, durationSeconds: event.duration, }); } else { task.state = 'success'; task.progress = 100; // telemetry usage this.telemetry.logUsage(eventName, { 'model.id': event.id, durationSeconds: event.duration }); } // cleanup downloader this.#downloaders.delete(event.id); } this.taskRegistry.updateTask(task); // update task } } public createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader { if (!model.url) { throw new Error(`model ${model.id} does not have url defined.`); } const modelHandler = this.modelHandlerRegistry.findModelHandler(model.url); if (!modelHandler) { throw new Error(`no model registry found for model ${model.id} url ${model.url}`); } // Create a downloader const downloader = modelHandler.createDownloader(model, abortSignal); this.#downloaders.set(model.id, downloader); return downloader; } private createDownloadTask(model: ModelInfo, labels?: { [key: string]: string }): Task { // it may happen that the taskRegistry contains old entries representing an old failing download, we delete them as we are starting a new download const failedPullingTaskIds = this.taskRegistry .getTasksByLabels({ 'model-pulling': model.id, }) .filter(t => t.state === 'error') .map(t => t.id); if (failedPullingTaskIds.length > 0) { this.taskRegistry.deleteAll(failedPullingTaskIds); } return this.taskRegistry.createTask(`Downloading model ${model.name}`, 'loading', { ...labels, 'model-pulling': model.id, }); } private async downloadModel(model: ModelInfo, task: Task): Promise { // Check if the model is already on disk. if (this.isModelOnDisk(model.id)) { task.name = `Model ${model.name} already present on disk`; const modelPath = this.getLocalModelPath(model.id); if (model.sha256) { const isValid = await hasValidSha(modelPath, model.sha256); if (!isValid) { task.state = 'error'; task.error = `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`; this.taskRegistry.updateTask(task); // update task throw new Error( `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`, ); } } task.state = 'success'; this.taskRegistry.updateTask(task); // update task // return model path return modelPath; } const abortController = new AbortController(); task.cancellationToken = this.cancellationTokenRegistry.createCancellationTokenSource(() => { abortController.abort('Cancel'); }); // update task to loading state this.taskRegistry.updateTask(task); const downloader = this.createDownloader(model, abortController.signal); // Capture downloader events downloader.onEvent(event => this.onDownloadUploadEvent(event, 'download'), this); // perform download await downloader.perform(model.id); await this.updateModelInfos(); return downloader.getTarget(); } async uploadModelToPodmanMachine( connection: ContainerProviderConnection, model: ModelInfo, labels?: { [key: string]: string }, ): Promise { // ensure the model upload is not disabled if (this.configurationRegistry.getExtensionConfiguration().modelUploadDisabled) { console.warn('The model upload is disabled, this may cause the inference server to take a few minutes to start.'); return getLocalModelFile(model); } this.taskRegistry.createTask(`Copying model ${model.name} to ${connection.name}`, 'loading', { ...labels, 'model-uploading': model.id, connection: connection.name, }); const uploader = new Uploader(connection, model); uploader.onEvent(event => this.onDownloadUploadEvent(event, 'upload'), this); // perform download const path = uploader.perform(model.id); await this.updateModelInfos(); return path; } private async updateModelInfos(): Promise { // refresh model lists on event completion await this.getLocalModelsFromDisk(); this.sendModelsInfo().catch((err: unknown) => { console.error('Something went wrong while sending models info.', err); }); } async getModelMetadata(modelId: string): Promise> { const model = this.#models.get(modelId); if (!model) throw new Error(`model with id ${modelId} does not exists.`); const before = performance.now(); const data: Record = { 'model-id': getHash(modelId), }; try { let result: GGUFParseOutput<{ strict: false }>; if (this.isModelOnDisk(modelId)) { const modelPath = path.normalize(getLocalModelFile(model)); result = await gguf(modelPath, { allowLocalFile: true }); } else if (model.url) { result = await gguf(model.url); } else { throw new Error('cannot get model metadata'); } return result.metadata; } catch (err: unknown) { data['error'] = err; console.error(err); throw err; } finally { data['duration'] = performance.now() - before; this.telemetry.logUsage('get-metadata', data); } } } ================================================ FILE: packages/backend/src/managers/monitoringManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, afterEach, test, vi } from 'vitest'; import { MonitoringManager } from './monitoringManager'; import { containerEngine, type ContainerStatsInfo, type Disposable } from '@podman-desktop/api'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_MONITORING_UPDATE } from '@shared/Messages'; vi.mock('@podman-desktop/api', async () => { return { containerEngine: { statsContainer: vi.fn(), }, }; }); const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); vi.mocked(containerEngine.statsContainer).mockResolvedValue({} as unknown as Disposable); vi.useFakeTimers(); }); afterEach(() => { vi.useRealTimers(); }); function simplifiedCallback(callback: (arg: ContainerStatsInfo) => void, cpu: number, ram: number): void { callback({ cpu_stats: { cpu_usage: { total_usage: cpu, }, }, memory_stats: { usage: ram, }, } as unknown as ContainerStatsInfo); } test('expect constructor to do nothing', () => { const manager = new MonitoringManager(rpcExtensionMock); expect(containerEngine.statsContainer).not.toHaveBeenCalled(); expect(manager.getStats().length).toBe(0); expect(rpcExtensionMock.fire).not.toHaveBeenCalled(); }); test('expect monitor method to start stats container', async () => { const manager = new MonitoringManager(rpcExtensionMock); await manager.monitor('randomContainerId', 'dummyEngineId'); expect(containerEngine.statsContainer).toHaveBeenCalledWith('dummyEngineId', 'randomContainerId', expect.anything()); }); test('expect monitor method to start stats container', async () => { const manager = new MonitoringManager(rpcExtensionMock); await manager.monitor('randomContainerId', 'dummyEngineId'); expect(containerEngine.statsContainer).toHaveBeenCalledWith('dummyEngineId', 'randomContainerId', expect.anything()); }); test('expect dispose to dispose stats container', async () => { const manager = new MonitoringManager(rpcExtensionMock); const fakeDisposable = vi.fn(); vi.mocked(containerEngine.statsContainer).mockResolvedValue({ dispose: fakeDisposable, }); await manager.monitor('randomContainerId', 'dummyEngineId'); manager.dispose(); expect(fakeDisposable).toHaveBeenCalled(); }); test('expect webview to be notified when statsContainer call back', async () => { const manager = new MonitoringManager(rpcExtensionMock); let mCallback: ((stats: ContainerStatsInfo) => void) | undefined; vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => { mCallback = callback; return { dispose: (): void => {} }; }); await manager.monitor('randomContainerId', 'dummyEngineId'); await vi.waitFor(() => { expect(mCallback).toBeDefined(); }); if (!mCallback) throw new Error('undefined mCallback'); const date = new Date(2000, 1, 1, 13); vi.setSystemTime(date); simplifiedCallback(mCallback, 123, 99); expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_MONITORING_UPDATE, [ { containerId: 'randomContainerId', stats: [ { timestamp: Date.now(), cpu_usage: 123, memory_usage: 99, }, ], }, ]); }); test('expect stats to cumulate', async () => { const manager = new MonitoringManager(rpcExtensionMock); let mCallback: ((stats: ContainerStatsInfo) => void) | undefined; vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => { mCallback = callback; return { dispose: (): void => {} }; }); await manager.monitor('randomContainerId', 'dummyEngineId'); await vi.waitFor(() => { expect(mCallback).toBeDefined(); }); if (!mCallback) throw new Error('undefined mCallback'); simplifiedCallback(mCallback, 0, 0); simplifiedCallback(mCallback, 1, 1); simplifiedCallback(mCallback, 2, 2); simplifiedCallback(mCallback, 3, 3); const stats = manager.getStats(); expect(stats.length).toBe(1); expect(stats[0].stats.length).toBe(4); }); test('expect old stats to be removed', async () => { const manager = new MonitoringManager(rpcExtensionMock); let mCallback: ((stats: ContainerStatsInfo) => void) | undefined; vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => { mCallback = callback; return { dispose: (): void => {} }; }); await manager.monitor('randomContainerId', 'dummyEngineId'); await vi.waitFor(() => { expect(mCallback).toBeDefined(); }); if (!mCallback) throw new Error('undefined mCallback'); vi.setSystemTime(new Date(2000, 1, 1, 13)); simplifiedCallback(mCallback, 0, 0); vi.setSystemTime(new Date(2005, 1, 1, 13)); simplifiedCallback(mCallback, 1, 1); simplifiedCallback(mCallback, 2, 2); simplifiedCallback(mCallback, 3, 3); const stats = manager.getStats(); expect(stats.length).toBe(1); expect(stats[0].stats.length).toBe(3); }); test('expect stats to be disposed if stats result is an error', async () => { const manager = new MonitoringManager(rpcExtensionMock); let mCallback: ((stats: ContainerStatsInfo) => void) | undefined; const fakeDisposable = vi.fn(); vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => { mCallback = callback; return { dispose: fakeDisposable }; }); await manager.monitor('randomContainerId', 'dummyEngineId'); await vi.waitFor(() => { expect(mCallback).toBeDefined(); }); if (!mCallback) throw new Error('undefined mCallback'); mCallback({ cause: 'container is stopped' } as unknown as ContainerStatsInfo); const stats = manager.getStats(); expect(stats.length).toBe(0); expect(fakeDisposable).toHaveBeenCalled(); }); ================================================ FILE: packages/backend/src/managers/monitoringManager.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type Disposable, containerEngine, type ContainerStatsInfo } from '@podman-desktop/api'; import { Publisher } from '../utils/Publisher'; import { MSG_MONITORING_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export interface StatsInfo { timestamp: number; cpu_usage: number; memory_usage: number; } export interface StatsHistory { containerId: string; stats: StatsInfo[]; } export const MAX_AGE: number = 5 * 60 * 1000; // 5 minutes export class MonitoringManager extends Publisher implements Disposable { #containerStats: Map; #disposables: Disposable[]; constructor(rpcExtension: RpcExtension) { super(rpcExtension, MSG_MONITORING_UPDATE, () => this.getStats()); this.#containerStats = new Map(); this.#disposables = []; } async monitor(containerId: string, engineId: string): Promise { const disposable = await containerEngine.statsContainer(engineId, containerId, statsInfo => { if ('cause' in statsInfo) { console.error('Cannot stats container', statsInfo.cause); disposable.dispose(); } else { this.push(containerId, statsInfo); } }); this.#disposables.push(disposable); return disposable; } private push(containerId: string, statsInfo: ContainerStatsInfo): void { let stats: StatsInfo[] = []; const statsHistory = this.#containerStats.get(containerId); if (statsHistory) { const limit = Date.now() - MAX_AGE; stats = statsHistory.stats.filter(stats => stats.timestamp > limit); } this.#containerStats.set(containerId, { containerId: containerId, stats: [ ...stats, { timestamp: Date.now(), cpu_usage: statsInfo.cpu_stats.cpu_usage.total_usage, memory_usage: statsInfo.memory_stats.usage, }, ], }); this.notify(); } clear(containerId: string): void { this.#containerStats.delete(containerId); } getStats(): StatsHistory[] { return Array.from(this.#containerStats.values()); } dispose(): void { this.#disposables.forEach(disposable => disposable.dispose()); } } ================================================ FILE: packages/backend/src/managers/playground/McpServerManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import path from 'node:path'; import { type RpcExtension } from '@shared/messages/MessageProxy'; import { type McpClient, type McpServer, McpServerType, type McpSettings } from '@shared/models/McpSettings'; import { McpServerManager } from './McpServerManager'; import { JsonWatcher } from '../../utils/JsonWatcher'; import { toMcpClients } from '../../utils/mcpUtils'; vi.mock('../../utils/JsonWatcher'); vi.mock('../../utils/mcpUtils'); const mockJsonWatcher = { init: vi.fn(), dispose: vi.fn(), onContentUpdated: vi.fn((fn: (mcpSettings: McpSettings) => void) => (update = fn)), } as unknown as JsonWatcher; const rpcExtension = { fire: vi.fn(() => Promise.resolve(true)) } as unknown as RpcExtension; let update: (mcpSettings: McpSettings) => void; let appUserDirectory: string; let mcpServerManager: McpServerManager; beforeEach(async () => { vi.resetAllMocks(); vi.mocked(JsonWatcher).mockReturnValue(mockJsonWatcher); vi.mocked(toMcpClients).mockImplementation(async (...mcpServers) => mcpServers.map(s => ({ name: s.name }) as unknown as McpClient), ); appUserDirectory = path.join('/', 'tmp', 'mcp-server-manager-test-'); mcpServerManager = new McpServerManager(rpcExtension, appUserDirectory); }); test('provides an empty default value', () => { expect(mcpServerManager.getMcpSettings()).toEqual({ servers: {} }); }); test('init initializes the watcher', () => { mcpServerManager.init(); expect(mockJsonWatcher.init).toHaveBeenCalled(); }); test('dispose disposes the watcher', () => { mcpServerManager.dispose(); expect(mockJsonWatcher.dispose).toHaveBeenCalled(); }); describe('when loading mcp-settings.json', () => { beforeEach(() => { const mcpSettings = { servers: { 'stdio-ok': { enabled: true, type: 'stdio', command: 'npx', args: ['-y', 'kubernetes-mcp-server'], }, 'sse-ok': { enabled: true, type: 'sse', url: 'https://echo.example.com/sse', headers: { foo: 'bar', }, }, 'invalid-type': { enabled: true, type: 'invalid', url: 'https://echo.example.com/sse', }, }, } as unknown as McpSettings; update(mcpSettings); }); test('loads valid servers', () => { expect(mcpServerManager.getMcpSettings().servers).toEqual( expect.objectContaining({ 'stdio-ok': { enabled: true, name: 'stdio-ok', type: McpServerType.STDIO, command: 'npx', args: ['-y', 'kubernetes-mcp-server'], }, 'sse-ok': { enabled: true, name: 'sse-ok', type: McpServerType.SSE, url: 'https://echo.example.com/sse', headers: { foo: 'bar' }, }, }), ); }); test('ignores invalid servers', () => { expect(mcpServerManager.getMcpSettings().servers['invalid-type']).toBeUndefined(); }); }); test('toMcpClients returns the enabled servers', async () => { mcpServerManager.init(); update({ servers: { enabled: { enabled: true, type: McpServerType.STDIO } as unknown as McpServer, disabled: { enabled: false, type: McpServerType.STDIO } as unknown as McpServer, }, }); const mcpClients = await mcpServerManager.toMcpClients(); expect(mcpClients).toEqual([{ name: 'enabled' }]); }); ================================================ FILE: packages/backend/src/managers/playground/McpServerManager.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import path from 'node:path'; import { type Disposable } from '@podman-desktop/api'; import { MSG_MCP_SERVERS_UPDATE } from '@shared/Messages'; import { type McpSettings, McpServerType, type McpClient } from '@shared/models/McpSettings'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { JsonWatcher } from '../../utils/JsonWatcher'; import { Publisher } from '../../utils/Publisher'; import { toMcpClients } from '../../utils/mcpUtils'; // TODO: Agree on the name of the file and its location const MCP_SETTINGS = 'mcp-settings.json'; export class McpServerManager extends Publisher implements Disposable { private readonly settingsFile: string; private mcpSettings: McpSettings; readonly #jsonWatcher: JsonWatcher; constructor( rpcExtension: RpcExtension, private appUserDirectory: string, ) { super(rpcExtension, MSG_MCP_SERVERS_UPDATE, () => this.getMcpSettings()); this.settingsFile = path.join(this.appUserDirectory, MCP_SETTINGS); this.mcpSettings = { servers: {}, }; this.#jsonWatcher = new JsonWatcher(this.settingsFile, { ...this.mcpSettings }); this.#jsonWatcher.onContentUpdated(this.onMcpSettingsUpdated.bind(this)); } /** * Lazily initialize the MCP server manager dependencies. */ init(): void { this.#jsonWatcher.init(); } private onMcpSettingsUpdated(mcpSettings: McpSettings): void { this.mcpSettings = { servers: {} }; for (const [name, mcpServer] of Object.entries(mcpSettings.servers ?? {})) { mcpServer.name = name; if (!Object.values(McpServerType).includes(mcpServer.type)) { console.warn(`McpServerManager: Invalid MCP server type ${mcpServer.type} for server ${mcpServer.name}.`); continue; } this.mcpSettings.servers[name] = mcpServer; } this.notify(); } getMcpSettings(): McpSettings { return this.mcpSettings; } async toMcpClients(): Promise { const enabledServers = Object.values(this.mcpSettings.servers).filter(server => server.enabled); return toMcpClients(...enabledServers); } dispose(): void { this.#jsonWatcher.dispose(); } } ================================================ FILE: packages/backend/src/managers/playground/aiSdk.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { describe, test, expect, beforeEach, vi } from 'vitest'; import * as ai from 'ai'; import { MockLanguageModelV3 } from 'ai/test'; import { AiStreamProcessor, toCoreMessage } from './aiSdk'; import type { AssistantChat, ChatMessage, Conversation, ErrorMessage, Message, PendingChat, UserChat, } from '@shared/models/IPlaygroundMessage'; import type { LanguageModelV3, LanguageModelV2CallWarning, LanguageModelV3StreamPart, LanguageModelV3GenerateResult, } from '@ai-sdk/provider'; import { ConversationRegistry } from '../../registries/ConversationRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import type { ModelOptions } from '@shared/models/IModelOptions'; import type { ToolSet } from 'ai'; import { jsonSchema, simulateStreamingMiddleware, tool, wrapLanguageModel } from 'ai'; vi.mock('ai', async original => { const mod = (await original()) as object; return { ...mod }; }); /* eslint-disable sonarjs/no-nested-functions */ describe('aiSdk', () => { beforeEach(() => { vi.resetAllMocks(); }); describe('toCoreMessage', () => { test('with no fields', () => { const result = toCoreMessage({} as Message); expect(result).toEqual([]); }); test('with no role', () => { const result = toCoreMessage({ content: 'alex' } as ChatMessage); expect(result).toEqual([]); }); test('with no content', () => { const result = toCoreMessage({ role: 'user' } as ChatMessage); expect(result).toEqual([{ role: 'user', content: '' }]); }); test('with all fields', () => { const result = toCoreMessage({ role: 'user', content: 'alex' } as ChatMessage); expect(result).toEqual([{ role: 'user', content: 'alex' }]); }); test('with multiple messages', () => { const result = toCoreMessage( { role: 'user', content: 'alex' } as ChatMessage, { role: 'assistant', content: 'bob' } as ChatMessage, ); expect(result).toEqual([ { role: 'user', content: 'alex' }, { role: 'assistant', content: 'bob' }, ]); }); test('with tool call messages', () => { const result = toCoreMessage( { role: 'user', content: 'alex' } as ChatMessage, { role: 'assistant', content: { type: 'tool-call', toolCallId: 'call-001', toolName: 'tool-1', args: {}, result: { content: [{ type: 'text', text: 'Success!!!' }], }, }, } as AssistantChat, { role: 'assistant', content: 'The call to the tool was a success!' } as AssistantChat, ); expect(result).toEqual([ { role: 'user', content: 'alex' }, { role: 'assistant', content: [ { type: 'tool-call', toolCallId: 'call-001', toolName: 'tool-1', input: {}, }, ], }, { role: 'tool', content: [ { type: 'tool-result', toolCallId: 'call-001', toolName: 'tool-1', output: { content: [{ type: 'text', text: 'Success!!!' }], }, }, ], }, { role: 'assistant', content: 'The call to the tool was a success!' }, ]); }); }); describe('AiStreamProcessor', () => { let conversationRegistry: ConversationRegistry; let conversationId: string; beforeEach(() => { const rpcExtension = { fire: vi.fn().mockResolvedValue(true), } as unknown as RpcExtension; conversationRegistry = new ConversationRegistry(rpcExtension); conversationId = conversationRegistry.createConversation('test-conversation', 'test-model'); conversationRegistry.submit(conversationId, { content: 'Aitana, please proceed with the test', role: 'user', id: conversationRegistry.getUniqueId(), timestamp: Date.now(), } as UserChat); }); test('sends model options', async () => { const streamTextSpy = vi.spyOn(ai, 'streamText'); const streamProcessor = new AiStreamProcessor(conversationId, conversationRegistry); const streamResult = streamProcessor.stream(createTestModel(), undefined, { temperature: 42, top_p: 13, max_tokens: 37, stream_options: { include_usage: true }, } as ModelOptions); await streamResult.consumeStream(); expect(streamTextSpy).toHaveBeenCalledWith( expect.objectContaining({ model: expect.anything(), temperature: 42, maxOutputTokens: 37, topP: 13, abortSignal: expect.any(AbortSignal), messages: expect.any(Array), onStepFinish: expect.any(Function), onError: expect.any(Function), onChunk: expect.any(Function), }), ); }); test('abort, completes the last assistant message', async () => { const incompleteMessageId = 'incomplete-message-id'; conversationRegistry.submit(conversationId, { id: incompleteMessageId, role: 'assistant', timestamp: Date.now(), choices: [], completed: undefined, } as PendingChat); const streamProcessor = new AiStreamProcessor(conversationId, conversationRegistry); streamProcessor['currentMessageId'] = incompleteMessageId; streamProcessor.abortController.abort('cancel'); expect(conversationRegistry.get(conversationId).messages).toHaveLength(2); expect((conversationRegistry.get(conversationId).messages[1] as AssistantChat).completed).not.toBeUndefined(); }); describe('with stream error', () => { beforeEach(async () => { // eslint-disable-next-line sonarjs/no-nested-functions const doStream: LanguageModelV3['doStream'] = async () => { throw new Error('The stream is kaput.'); }; const model = new MockLanguageModelV3({ doStream }); await new AiStreamProcessor(conversationId, conversationRegistry).stream(model).consumeStream(); }); test('appends a single message', () => { expect(conversationRegistry.get(conversationId).messages).toHaveLength(2); }); test('appended message is error', () => { expect((conversationRegistry.get(conversationId).messages[1] as ErrorMessage).error).toEqual( 'The stream is kaput.', ); }); }); describe('with single message stream', () => { let model: LanguageModelV3; beforeEach(async () => { model = createTestModel({ stream: ai.simulateReadableStream({ chunks: [ { type: 'response-metadata', id: 'id-0', modelId: 'mock-model-id', timestamp: new Date(0), }, { type: 'text-delta', id: 'id-1', delta: 'Greetings' }, { type: 'text-delta', id: 'id-2', delta: ' professor ' }, { type: 'text-delta', id: 'id-3', delta: `Falken` }, { type: 'finish', finishReason: { unified: 'stop', raw: undefined }, usage: { outputTokens: { total: 133, text: undefined, reasoning: undefined }, inputTokens: { total: 7, noCache: undefined, cacheRead: undefined, cacheWrite: undefined }, totalTokens: 140, }, }, ], }), }); await new AiStreamProcessor(conversationId, conversationRegistry).stream(model).consumeStream(); }); test('appends a single message', () => { expect(conversationRegistry.get(conversationId).messages).toHaveLength(2); }); test('appended message is from assistant', () => { expect((conversationRegistry.get(conversationId).messages[1] as ChatMessage).role).toEqual('assistant'); }); test('concatenates message content', () => { expect((conversationRegistry.get(conversationId).messages[1] as ChatMessage).content).toEqual( 'Greetings professor Falken', ); }); test('setsUsage', async () => { const conversation = conversationRegistry.get(conversationId) as Conversation; expect(conversation?.usage?.completion_tokens).toEqual(133); expect(conversation?.usage?.prompt_tokens).toEqual(7); }); }); describe('with wrapped generated multiple messages as stream', () => { let model: LanguageModelV3; let tools: ToolSet; let generateStep: number; beforeEach(async () => { generateStep = 0; model = wrapLanguageModel({ model: new MockLanguageModelV3({ doGenerate: async (): Promise => { if (generateStep++ === 0) { return { content: [ { type: 'tool-call', toolCallId: 'call-001', toolName: 'tool-1', input: '{}', }, { type: 'tool-call', toolCallId: 'call-002', toolName: 'tool-1', input: '{}', }, ], finishReason: { unified: 'tool-calls', raw: undefined }, usage: { inputTokens: { total: 1, noCache: undefined, cacheRead: undefined, cacheWrite: undefined }, outputTokens: { total: 1, text: undefined, reasoning: undefined }, }, warnings: [], }; } return { content: [ { type: 'text', text: 'These are the results of you functions: huge success!', }, ], finishReason: { unified: 'stop', raw: undefined }, usage: { inputTokens: { total: 133, noCache: undefined, cacheRead: undefined, cacheWrite: undefined }, outputTokens: { total: 7, text: undefined, reasoning: undefined }, }, warnings: [], }; }, }), middleware: simulateStreamingMiddleware(), }); tools = { 'tool-1': tool({ inputSchema: jsonSchema({ type: 'object' }), execute: async () => 'successful result!', }), }; await new AiStreamProcessor(conversationId, conversationRegistry).stream(model, tools).consumeStream(); }); test('appends multiple messages', () => { expect(conversationRegistry.get(conversationId).messages).toHaveLength(4); }); test.each<{ index: number; toolCallId: string }>([ { index: 1, toolCallId: 'call-001' }, { index: 2, toolCallId: 'call-002' }, ])(`appends tool call (to tool-1) message at $index`, ({ index, toolCallId }) => { const message = conversationRegistry.get(conversationId).messages[index] as AssistantChat; expect(message.role).toEqual('assistant'); expect(message.content).toMatchObject({ type: 'tool-call', toolCallId, toolName: 'tool-1', args: {}, }); }); test.each<{ index: number; id: string; toolCallId: string }>([ { index: 1, id: '3', toolCallId: 'call-001' }, { index: 2, id: '4', toolCallId: 'call-002' }, ])(`sets tool result message at $index for $toolCallId`, ({ index, id, toolCallId }) => { const message = conversationRegistry.get(conversationId).messages[index] as AssistantChat; expect(message.id).toEqual(id); expect(message.timestamp).toBeDefined(); expect(message.role).toEqual('assistant'); expect(message.content).toMatchObject({ type: 'tool-call', toolCallId, toolName: 'tool-1', args: {}, }); if (message.content && typeof message.content === 'object' && 'result' in message.content) { expect(message.content.result).toEqual('successful result!'); expect(message.completed).toBeDefined(); } }); test('appends final assistant message', () => { const message = conversationRegistry.get(conversationId).messages[3] as AssistantChat; expect(message.role).toEqual('assistant'); expect(message.content).toEqual('These are the results of you functions: huge success!'); }); test('setsUsage', async () => { const conversation = conversationRegistry.get(conversationId) as Conversation; expect(conversation?.usage?.completion_tokens).toEqual(7); expect(conversation?.usage?.prompt_tokens).toEqual(133); }); }); }); }); export function createTestModel({ stream = ai.simulateReadableStream({ chunks: [] }), rawCall = { rawPrompt: 'prompt', rawSettings: {} }, rawResponse = undefined, request = undefined, warnings, }: { stream?: ReadableStream; rawResponse?: { headers: Record }; rawCall?: { rawPrompt: string; rawSettings: Record }; request?: { body: string }; warnings?: LanguageModelV2CallWarning[]; } = {}): LanguageModelV3 { return new MockLanguageModelV3({ doStream: async () => ({ stream, rawCall, rawResponse, request, warnings }), }); } ================================================ FILE: packages/backend/src/managers/playground/aiSdk.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { streamText, stepCountIs } from 'ai'; import type { LanguageModel, ModelMessage, StepResult, StreamTextResult, StreamTextOnFinishCallback, TextStreamPart, ToolCallPart, ToolResultPart, ToolSet, } from 'ai'; import type { ModelOptions } from '@shared/models/IModelOptions'; import { type AssistantChat, type ErrorMessage, isAssistantToolCall, type Message, type ModelUsage, type PendingChat, type ToolCall, } from '@shared/models/IPlaygroundMessage'; import { isChatMessage } from '@shared/models/IPlaygroundMessage'; import type { ConversationRegistry } from '../../registries/ConversationRegistry'; export function toCoreMessage(...messages: Message[]): ModelMessage[] { const ret: ModelMessage[] = []; for (const message of messages) { if (isAssistantToolCall(message)) { const toolCall = message.content as ToolCall; ret.push({ role: 'assistant', content: [ { type: 'tool-call', toolCallId: toolCall.toolCallId, toolName: toolCall.toolName, input: toolCall.args, } as ToolCallPart, ] as ToolCallPart[], } as ModelMessage); if (toolCall.result) { ret.push({ role: 'tool', content: [ { type: 'tool-result', toolCallId: toolCall.toolCallId, toolName: toolCall.toolName, output: toolCall.result, } as ToolResultPart, ] as ToolResultPart[], } as ModelMessage); } } else if (isChatMessage(message)) { ret.push({ role: message.role, content: message.content ?? '', } as ModelMessage); } } return ret; } export class AiStreamProcessor { private stepStartTime: number | undefined; private currentMessageId: string | undefined; public readonly abortController: AbortController; constructor( private conversationId: string, private conversationRegistry: ConversationRegistry, ) { this.abortController = new AbortController(); this.abortController.signal.addEventListener('abort', this.onAbort); } private onStepFinish = (stepResult: StepResult): void => { this.conversationRegistry.setUsage(this.conversationId, { completion_tokens: stepResult.usage.outputTokens, prompt_tokens: stepResult.usage.inputTokens, } as ModelUsage); if (this.currentMessageId !== undefined) { this.conversationRegistry.completeMessage(this.conversationId, this.currentMessageId); } if (stepResult.toolCalls?.length > 0) { for (const toolCall of stepResult.toolCalls) { this.conversationRegistry.submit(this.conversationId, { id: this.conversationRegistry.getUniqueId(), role: 'assistant', timestamp: this.stepStartTime, content: { type: 'tool-call', toolCallId: toolCall.toolCallId, toolName: toolCall.toolName, args: toolCall.input, } as ToolCall, } as AssistantChat); } } if (stepResult.toolResults?.length > 0) { for (const toolResult of stepResult.toolResults) { this.conversationRegistry.toolResult( this.conversationId, toolResult.toolCallId, toolResult.output as string | object, ); } } this.currentMessageId = undefined; this.stepStartTime = Date.now(); }; private onChunk = ({ chunk }: { chunk: TextStreamPart }): void => { if (chunk.type !== 'text-delta') { return; } if (this.currentMessageId === undefined) { this.currentMessageId = this.conversationRegistry.getUniqueId(); this.conversationRegistry.submit(this.conversationId, { id: this.currentMessageId, role: 'assistant', timestamp: this.stepStartTime, choices: [], completed: undefined, } as PendingChat); } this.conversationRegistry.textDelta(this.conversationId, this.currentMessageId, chunk.text); }; private onError = (error: unknown): void => { if (error instanceof Object && 'error' in error) { error = error.error; } if (error instanceof Error) { error = error.message; } let errorMessage = String(error); if (errorMessage.endsWith('Please reduce the length of the messages or completion.')) { errorMessage += ' Note: You should start a new playground.'; } console.error('Something went wrong while creating model response', errorMessage); this.conversationRegistry.submit(this.conversationId, { id: this.conversationRegistry.getUniqueId(), timestamp: Date.now(), error: errorMessage, } as ErrorMessage); }; private onAbort = (): void => { // Ensure the last message is marked as complete to allow the user to resume the conversation if (this.currentMessageId !== undefined) { this.conversationRegistry.completeMessage(this.conversationId, this.currentMessageId); } }; private onFinish: StreamTextOnFinishCallback = stepResult => { this.conversationRegistry.setUsage(this.conversationId, { completion_tokens: stepResult.usage.outputTokens, prompt_tokens: stepResult.usage.inputTokens, } as ModelUsage); }; stream = (model: LanguageModel, tools?: TOOLS, options?: ModelOptions): StreamTextResult => { this.stepStartTime = Date.now(); return streamText({ model, tools, stopWhen: stepCountIs(10), temperature: options?.temperature, maxOutputTokens: (options?.max_tokens ?? -1) < 1 ? undefined : options?.max_tokens, topP: options?.top_p, abortSignal: this.abortController.signal, messages: toCoreMessage(...this.conversationRegistry.get(this.conversationId).messages), onStepFinish: this.onStepFinish, onError: this.onError, onChunk: this.onChunk, onFinish: this.onFinish, }); }; } ================================================ FILE: packages/backend/src/managers/playgroundV2Manager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test, vi, beforeEach, afterEach, describe } from 'vitest'; import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import { PlaygroundV2Manager } from './playgroundV2Manager'; import type { TelemetryLogger } from '@podman-desktop/api'; import type { InferenceServer } from '@shared/models/IInference'; import type { InferenceManager } from './inference/inferenceManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { TaskRegistry } from '../registries/TaskRegistry'; import type { Task, TaskState } from '@shared/models/ITask'; import type { ChatMessage, ErrorMessage } from '@shared/models/IPlaygroundMessage'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_CONVERSATIONS_UPDATE } from '@shared/Messages'; import type { LanguageModelV2CallWarning, LanguageModelV3, LanguageModelV3StreamPart } from '@ai-sdk/provider'; import { type McpServerManager } from './playground/McpServerManager'; import { MockLanguageModelV3 } from 'ai/test'; import { simulateReadableStream } from 'ai'; vi.mock('@ai-sdk/openai-compatible', () => ({ createOpenAICompatible: vi.fn(), })); const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; const inferenceManagerMock = { get: vi.fn(), getServers: vi.fn(), createInferenceServer: vi.fn(), startInferenceServer: vi.fn(), } as unknown as InferenceManager; const taskRegistryMock = { createTask: vi.fn(), getTasksByLabels: vi.fn(), updateTask: vi.fn(), } as unknown as TaskRegistry; const telemetryMock = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; const cancellationTokenRegistryMock = { createCancellationTokenSource: vi.fn(), delete: vi.fn(), } as unknown as CancellationTokenRegistry; let mcpServerManager: McpServerManager; let createTestModel: (options: { stream?: ReadableStream; rawResponse?: { headers: Record }; rawCall?: { rawPrompt: string; rawSettings: Record }; request?: { body: string }; warnings?: LanguageModelV2CallWarning[]; }) => LanguageModelV3; beforeEach(async () => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); vi.useFakeTimers(); mcpServerManager = { getMcpSettings: vi.fn(() => {}), toMcpClients: vi.fn(() => []), } as unknown as McpServerManager; createTestModel = (await import('./playground/aiSdk.spec')).createTestModel; }); afterEach(async () => { vi.useRealTimers(); }); test('manager should be properly initialized', () => { const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); expect(manager.getConversations().length).toBe(0); }); test('submit should throw an error if the server is stopped', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', models: [ { id: 'model1', }, ], } as unknown as InferenceServer, ]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground('playground 1', { id: 'model1' } as ModelInfo, 'tracking-1'); vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'stopped', models: [ { id: 'model1', }, ], } as unknown as InferenceServer, ]); await expect(manager.submit(manager.getConversations()[0].id, 'dummyUserInput')).rejects.toThrowError( 'Inference server is not running.', ); }); test('submit should throw an error if the server is unhealthy', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', health: { Status: 'unhealthy', }, models: [ { id: 'model1', }, ], } as unknown as InferenceServer, ]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground('p1', { id: 'model1' } as ModelInfo, 'tracking-1'); const playgroundId = manager.getConversations()[0].id; await expect(manager.submit(playgroundId, 'dummyUserInput')).rejects.toThrowError( 'Inference server is not healthy, currently status: unhealthy.', ); }); test('create playground should create conversation.', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', health: { Status: 'healthy', }, models: [ { id: 'dummyModelId', file: { file: 'dummyModelFile', }, }, ], } as unknown as InferenceServer, ]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); expect(manager.getConversations().length).toBe(0); await manager.createPlayground('playground 1', { id: 'model-1' } as ModelInfo, 'tracking-1'); const conversations = manager.getConversations(); expect(conversations.length).toBe(1); }); test('valid submit should create IPlaygroundMessage and notify the webview', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', health: { Status: 'healthy', }, models: [ { id: 'dummyModelId', file: { path: '.', file: 'dummyModelFile', }, }, ], connection: { port: 8888, }, labels: [], } as unknown as InferenceServer, ]); // @ts-expect-error - Mock return type for testing vi.mocked(createOpenAICompatible).mockReturnValue(() => createTestModel({ stream: simulateReadableStream({ chunks: [ { type: 'text-delta', id: 'id-1', delta: 'The message from the model' }, { type: 'finish', finishReason: { unified: 'stop', raw: undefined }, usage: { outputTokens: { total: 133, text: undefined, reasoning: undefined }, inputTokens: { total: 7, noCache: undefined, cacheRead: undefined, cacheWrite: undefined }, }, }, ], }), }), ); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground('playground 1', { id: 'dummyModelId' } as ModelInfo, 'tracking-1'); const date = new Date(2000, 1, 1, 13); vi.setSystemTime(date); const playgrounds = manager.getConversations(); await manager.submit(playgrounds[0].id, 'dummyUserInput'); // Wait for assistant message to be completed await vi.waitFor(() => { expect(manager.getConversations()[0].usage?.completion_tokens).toBeGreaterThan(0); }); const conversations = manager.getConversations(); expect(conversations.length).toBe(1); expect(conversations[0].messages.length).toBe(2); expect(conversations[0].messages[0]).toStrictEqual({ content: 'dummyUserInput', id: expect.anything(), options: undefined, role: 'user', timestamp: expect.any(Number), }); expect(conversations[0].messages[1]).toStrictEqual({ choices: undefined, completed: expect.any(Number), content: 'The message from the model', id: expect.anything(), role: 'assistant', timestamp: expect.any(Number), }); expect(conversations[0].usage).toStrictEqual({ completion_tokens: 133, prompt_tokens: 7, }); expect(rpcExtensionMock.fire).toHaveBeenLastCalledWith(MSG_CONVERSATIONS_UPDATE, conversations); }); test('error', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', health: { Status: 'healthy', }, models: [ { id: 'dummyModelId', file: { path: '.', file: 'dummyModelFile', }, }, ], connection: { port: 8888, }, labels: [], } as unknown as InferenceServer, ]); const doStream: LanguageModelV3['doStream'] = async () => { throw new Error('Please reduce the length of the messages or completion.'); }; vi.mocked(createOpenAICompatible).mockReturnValue( // @ts-expect-error MockLanguageModelV2 test mock // eslint-disable-next-line sonarjs/new-operator-misuse () => new (MockLanguageModelV3 as unknown as new (options: { doStream: LanguageModelV3['doStream']; }) => LanguageModelV3)({ doStream }), ); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground('playground 1', { id: 'dummyModelId' } as ModelInfo, 'tracking-1'); const date = new Date(2000, 1, 1, 13); vi.setSystemTime(date); const playgrounds = manager.getConversations(); await manager.submit(playgrounds[0].id, 'dummyUserInput'); // Wait for error message await vi.waitFor(() => { expect((manager.getConversations()[0].messages[1] as ErrorMessage).error).toBeDefined(); }); const conversations = manager.getConversations(); expect(conversations.length).toBe(1); expect(conversations[0].messages.length).toBe(2); expect(conversations[0].messages[0]).toStrictEqual({ content: 'dummyUserInput', id: expect.anything(), options: undefined, role: 'user', timestamp: expect.any(Number), }); expect(conversations[0].messages[1]).toStrictEqual({ error: 'Please reduce the length of the messages or completion. Note: You should start a new playground.', id: expect.anything(), timestamp: expect.any(Number), }); expect(rpcExtensionMock.fire).toHaveBeenLastCalledWith(MSG_CONVERSATIONS_UPDATE, conversations); }); test('creating a new playground should send new playground to frontend', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground( 'a name', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_CONVERSATIONS_UPDATE, [ { id: expect.anything(), modelId: 'model-1', name: 'a name', messages: [], usage: { completion_tokens: 0, prompt_tokens: 0, }, }, ]); }); test('creating a new playground with no name should send new playground to frontend with generated name', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground( '', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_CONVERSATIONS_UPDATE, [ { id: expect.anything(), modelId: 'model-1', name: 'playground 1', messages: [], usage: { completion_tokens: 0, prompt_tokens: 0, }, }, ]); }); test('creating a new playground with no model served should start an inference server', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]); const createInferenceServerMock = vi.mocked(inferenceManagerMock.createInferenceServer); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground( 'a name', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); expect(createInferenceServerMock).toHaveBeenCalledWith({ gpuLayers: expect.any(Number), image: undefined, providerId: undefined, inferenceProvider: undefined, labels: { trackingId: 'tracking-1', }, modelsInfo: [ { id: 'model-1', name: 'Model 1', }, ], port: expect.anything(), }); }); test('creating a new playground with the model already served should not start an inference server', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { models: [ { id: 'model-1', }, ], }, ] as InferenceServer[]); const createInferenceServerMock = vi.mocked(inferenceManagerMock.createInferenceServer); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground( 'a name', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); expect(createInferenceServerMock).not.toHaveBeenCalled(); }); test('creating a new playground with the model server stopped should start the inference server', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { models: [ { id: 'model-1', }, ], status: 'stopped', container: { containerId: 'container-1', }, }, ] as InferenceServer[]); const createInferenceServerMock = vi.mocked(inferenceManagerMock.createInferenceServer); const startInferenceServerMock = vi.mocked(inferenceManagerMock.startInferenceServer); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground( 'a name', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); expect(createInferenceServerMock).not.toHaveBeenCalled(); expect(startInferenceServerMock).toHaveBeenCalledWith('container-1'); }); test('delete conversation should delete the conversation', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); expect(manager.getConversations().length).toBe(0); await manager.createPlayground( 'a name', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); const conversations = manager.getConversations(); expect(conversations.length).toBe(1); manager.deleteConversation(conversations[0].id); expect(manager.getConversations().length).toBe(0); expect(rpcExtensionMock.fire).toHaveBeenCalled(); }); test('creating a new playground with an existing name should fail', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground( 'a name', { id: 'model-1', name: 'Model 1', } as unknown as ModelInfo, 'tracking-1', ); await expect( manager.createPlayground( 'a name', { id: 'model-2', name: 'Model 2', } as unknown as ModelInfo, 'tracking-2', ), ).rejects.toThrowError('a playground with the name a name already exists'); }); test('requestCreatePlayground should call createPlayground and createTask, then updateTask', async () => { vi.useRealTimers(); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); const createTaskMock = vi.mocked(taskRegistryMock).createTask; const updateTaskMock = vi.mocked(taskRegistryMock).updateTask; createTaskMock.mockImplementation((_name: string, _state: TaskState, labels?: { [id: string]: string }) => { return { labels, } as Task; }); const createPlaygroundSpy = vi.spyOn(manager, 'createPlayground').mockResolvedValue('playground-1'); const id = await manager.requestCreatePlayground('a name', { id: 'model-1' } as ModelInfo); expect(createPlaygroundSpy).toHaveBeenCalledWith('a name', { id: 'model-1' } as ModelInfo, expect.any(String)); expect(createTaskMock).toHaveBeenCalledWith('Creating Playground environment', 'loading', { trackingId: id, }); await new Promise(resolve => setTimeout(resolve, 0)); expect(updateTaskMock).toHaveBeenCalledWith({ labels: { trackingId: id, playgroundId: 'playground-1', }, state: 'success', }); }); test('requestCreatePlayground should call createPlayground and createTask, then updateTask when createPlayground fails', async () => { vi.useRealTimers(); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); const createTaskMock = vi.mocked(taskRegistryMock).createTask; const updateTaskMock = vi.mocked(taskRegistryMock).updateTask; const getTasksByLabelsMock = vi.mocked(taskRegistryMock).getTasksByLabels; createTaskMock.mockImplementation((_name: string, _state: TaskState, labels?: { [id: string]: string }) => { return { labels, } as Task; }); const createPlaygroundSpy = vi.spyOn(manager, 'createPlayground').mockRejectedValue(new Error('an error')); const id = await manager.requestCreatePlayground('a name', { id: 'model-1' } as ModelInfo); expect(createPlaygroundSpy).toHaveBeenCalledWith('a name', { id: 'model-1' } as ModelInfo, expect.any(String)); expect(createTaskMock).toHaveBeenCalledWith('Creating Playground environment', 'loading', { trackingId: id, }); getTasksByLabelsMock.mockReturnValue([ { labels: { trackingId: id, }, } as unknown as Task, ]); await new Promise(resolve => setTimeout(resolve, 0)); expect(updateTaskMock).toHaveBeenCalledWith({ error: 'Something went wrong while trying to create a playground environment Error: an error.', labels: { trackingId: id, }, state: 'error', }); }); describe('system prompt', () => { test('set system prompt on non existing conversation should throw an error', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', models: [ { id: 'model1', }, ], } as unknown as InferenceServer, ]); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); expect(() => { manager.setSystemPrompt('invalid', 'content'); }).toThrowError('conversation with id invalid does not exist.'); }); test('set system prompt should throw an error if user already submit message', async () => { vi.mocked(inferenceManagerMock.getServers).mockReturnValue([ { status: 'running', health: { Status: 'healthy', }, models: [ { id: 'dummyModelId', file: { path: '.', file: 'dummyModelFile', }, }, ], connection: { port: 8888, }, labels: [], } as unknown as InferenceServer, ]); // @ts-expect-error - Mock return type for testing vi.mocked(createOpenAICompatible).mockReturnValue(() => createTestModel({ stream: simulateReadableStream({ chunks: [ { type: 'text-delta', id: 'id-1', delta: 'The message from the model' }, { type: 'finish', finishReason: { unified: 'stop', raw: undefined }, usage: { outputTokens: { total: 133, text: undefined, reasoning: undefined }, inputTokens: { total: 7, noCache: undefined, cacheRead: undefined, cacheWrite: undefined }, }, }, ], }), }), ); const manager = new PlaygroundV2Manager( rpcExtensionMock, inferenceManagerMock, taskRegistryMock, telemetryMock, cancellationTokenRegistryMock, mcpServerManager, ); await manager.createPlayground('playground 1', { id: 'dummyModelId' } as ModelInfo, 'tracking-1'); const date = new Date(2000, 1, 1, 13); vi.setSystemTime(date); const conversations = manager.getConversations(); await manager.submit(conversations[0].id, 'dummyUserInput'); // Wait for assistant message to be completed await vi.waitFor(() => { expect((manager.getConversations()[0].messages[1] as ChatMessage).content).toBeDefined(); }); expect(() => { manager.setSystemPrompt(manager.getConversations()[0].id, 'newSystemPrompt'); }).toThrowError('Cannot change system prompt on started conversation.'); }); }); ================================================ FILE: packages/backend/src/managers/playgroundV2Manager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { Disposable, TelemetryLogger } from '@podman-desktop/api'; import type { InferenceManager } from './inference/inferenceManager'; import type { ModelOptions } from '@shared/models/IModelOptions'; import { ConversationRegistry } from '../registries/ConversationRegistry'; import type { Conversation, SystemPrompt, UserChat } from '@shared/models/IPlaygroundMessage'; import { isSystemPrompt } from '@shared/models/IPlaygroundMessage'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { withDefaultConfiguration } from '../utils/inferenceUtils'; import { getRandomString } from '../utils/randomUtils'; import type { TaskRegistry } from '../registries/TaskRegistry'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; import { getHash } from '../utils/sha'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import { AiStreamProcessor } from './playground/aiSdk'; import { type McpServerManager } from './playground/McpServerManager'; import type { ToolSet } from 'ai'; import { simulateStreamingMiddleware, wrapLanguageModel } from 'ai'; export class PlaygroundV2Manager implements Disposable { readonly #conversationRegistry: ConversationRegistry; constructor( rpcExtension: RpcExtension, private inferenceManager: InferenceManager, private taskRegistry: TaskRegistry, private telemetry: TelemetryLogger, private cancellationTokenRegistry: CancellationTokenRegistry, private mcpServerManager: McpServerManager, ) { this.#conversationRegistry = new ConversationRegistry(rpcExtension); } deleteConversation(conversationId: string): void { const conversation = this.#conversationRegistry.get(conversationId); this.telemetry.logUsage('playground.delete', { totalMessages: conversation.messages.length, modelId: getHash(conversation.modelId), }); this.#conversationRegistry.deleteConversation(conversationId); } async requestCreatePlayground(name: string, model: ModelInfo): Promise { const trackingId: string = getRandomString(); const task = this.taskRegistry.createTask('Creating Playground environment', 'loading', { trackingId: trackingId, }); const telemetry: Record = { hasName: !!name, modelId: getHash(model.id), }; this.createPlayground(name, model, trackingId) .then((playgroundId: string) => { this.taskRegistry.updateTask({ ...task, state: 'success', labels: { ...task.labels, playgroundId, }, }); }) .catch((err: unknown) => { telemetry['errorMessage'] = `${String(err)}`; const tasks = this.taskRegistry.getTasksByLabels({ trackingId: trackingId, }); // Filter the one no in loading state tasks .filter(t => t.state === 'loading' && t.id !== task.id) .forEach(t => { this.taskRegistry.updateTask({ ...t, state: 'error', }); }); // Update the main task this.taskRegistry.updateTask({ ...task, state: 'error', error: `Something went wrong while trying to create a playground environment ${String(err)}.`, }); }) .finally(() => { this.telemetry.logUsage('playground.create', telemetry); }); return trackingId; } async createPlayground(name: string, model: ModelInfo, trackingId: string): Promise { if (!name) { name = this.getFreeName(); } if (!this.isNameFree(name)) { throw new Error(`a playground with the name ${name} already exists`); } // Create conversation const conversationId = this.#conversationRegistry.createConversation(name, model.id); // create/start inference server if necessary const servers = this.inferenceManager.getServers(); const server = servers.find(s => s.models.map(mi => mi.id).includes(model.id)); if (!server) { await this.inferenceManager.createInferenceServer( await withDefaultConfiguration({ modelsInfo: [model], labels: { trackingId: trackingId, }, }), ); } else if (server.status === 'stopped') { await this.inferenceManager.startInferenceServer(server.container.containerId); } return conversationId; } /** * Add a system prompt to an existing conversation. * @param conversationId the conversation to append the system prompt to. * @param content the content of the system prompt */ private submitSystemPrompt(conversationId: string, content: string): void { this.#conversationRegistry.submit(conversationId, { content: content, role: 'system', id: this.#conversationRegistry.getUniqueId(), timestamp: Date.now(), } as SystemPrompt); this.telemetry.logUsage('playground.system-prompt.create', { modelId: getHash(this.#conversationRegistry.get(conversationId).modelId), }); } /** * Given a conversation, update the system prompt. * If none exists, it will create one, otherwise it will replace the content with the new one * @param conversationId the conversation id to set the system id * @param content the new system prompt to use */ setSystemPrompt(conversationId: string, content: string | undefined): void { const conversation = this.#conversationRegistry.get(conversationId); if (content === undefined || content.length === 0) { this.#conversationRegistry.removeMessage(conversationId, conversation.messages[0].id); this.telemetry.logUsage('playground.system-prompt.delete', { modelId: getHash(conversation.modelId), }); return; } if (conversation.messages.length === 0) { this.submitSystemPrompt(conversationId, content); } else if (conversation.messages.length === 1 && isSystemPrompt(conversation.messages[0])) { this.#conversationRegistry.update(conversationId, conversation.messages[0].id, { content, }); this.telemetry.logUsage('playground.system-prompt.update', { modelId: getHash(conversation.modelId), }); } else { throw new Error('Cannot change system prompt on started conversation.'); } } /** * @param conversationId * @param userInput the user input * @param options the model configuration */ async submit(conversationId: string, userInput: string, options?: ModelOptions): Promise { const conversation = this.#conversationRegistry.get(conversationId); const servers = this.inferenceManager.getServers(); const server = servers.find(s => s.models.map(mi => mi.id).includes(conversation.modelId)); if (server === undefined) throw new Error('Inference server not found.'); if (server.status !== 'running') throw new Error('Inference server is not running.'); if (server.health?.Status !== 'healthy') throw new Error(`Inference server is not healthy, currently status: ${server.health?.Status ?? 'unknown'}.`); const modelInfo = server.models.find(model => model.id === conversation.modelId); if (modelInfo === undefined) throw new Error( `modelId '${conversation.modelId}' is not available on the inference server, valid model ids are: ${server.models.map(model => model.id).join(', ')}.`, ); this.#conversationRegistry.submit(conversation.id, { content: userInput, options: options, role: 'user', id: this.#conversationRegistry.getUniqueId(), timestamp: Date.now(), } as UserChat); if (!modelInfo.file?.path) throw new Error('model info has undefined file.'); const telemetry: Record = { conversationId: conversationId, ...options, promptLength: userInput.length, modelId: getHash(modelInfo.id), }; const streamProcessor = new AiStreamProcessor(conversationId, this.#conversationRegistry); const cancelTokenId = this.cancellationTokenRegistry.createCancellationTokenSource(() => { streamProcessor.abortController.abort('cancel'); }); const tools: ToolSet = {}; const mcpClients = await this.mcpServerManager.toMcpClients(); for (const client of mcpClients) { const clientTools = await client.tools(); for (const entry of Object.entries(clientTools)) { tools[entry[0]] = entry[1]; } } const openAiClient = createOpenAICompatible({ name: modelInfo.name, baseURL: server.labels['api'] ?? `http://localhost:${server.connection.port}/v1`, }); let model = openAiClient(modelInfo.name); // Tool calling in OpenAI doesn't support streaming yet if (Object.keys(tools).length > 0) { model = wrapLanguageModel({ model, middleware: simulateStreamingMiddleware() }); } const start = Date.now(); const finalBlock = (): void => { this.telemetry.logUsage('playground.submit', telemetry); this.cancellationTokenRegistry.delete(cancelTokenId); Promise.all(mcpClients.map(client => client.close())).catch((e: unknown) => console.error(`Error closing MCP client`, e), ); }; streamProcessor .stream(model, tools, options) .consumeStream() .then( () => { this.telemetry.logUsage('playground.message.complete', { duration: Date.now() - start, modelId: getHash(conversation.modelId), }); finalBlock(); }, (err: unknown) => { console.error('Something went wrong while processing stream', err); finalBlock(); }, ); return cancelTokenId; } getConversations(): Conversation[] { return this.#conversationRegistry.getAll(); } private getFreeName(): string { const names = new Set(this.getConversations().map(c => c.name)); let i = 0; let name: string; do { name = `playground ${++i}`; } while (names.has(name)); return name; } private isNameFree(name: string): boolean { return !this.getConversations().some(c => c.name === name); } dispose(): void { this.#conversationRegistry.dispose(); } } ================================================ FILE: packages/backend/src/managers/podmanConnection.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import { PodmanConnection } from './podmanConnection'; import type { ContainerProviderConnection, Extension, ProviderConnectionStatus, ProviderContainerConnection, ProviderEvent, RegisterContainerConnectionEvent, RunResult, UnregisterContainerConnectionEvent, UpdateContainerConnectionEvent, } from '@podman-desktop/api'; import { containerEngine, extensions, process, provider, EventEmitter, env } from '@podman-desktop/api'; import { VMType } from '@shared/models/IPodman'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { getPodmanCli, getPodmanMachineName } from '../utils/podman'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_PODMAN_CONNECTION_UPDATE } from '@shared/Messages'; const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; vi.mock('@podman-desktop/api', async () => { return { EventEmitter: vi.fn(), provider: { onDidUnregisterContainerConnection: vi.fn(), onDidRegisterContainerConnection: vi.fn(), onDidUpdateContainerConnection: vi.fn(), onDidUpdateProvider: vi.fn(), getContainerConnections: vi.fn(), }, process: { exec: vi.fn(), }, extensions: { getExtension: vi.fn(), }, containerEngine: { listInfos: vi.fn(), }, env: { isLinux: vi.fn(), }, navigation: {}, }; }); vi.mock('../utils/podman', () => { return { getPodmanCli: vi.fn(), getPodmanMachineName: vi.fn(), MIN_CPUS_VALUE: 4, }; }); beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); vi.mocked(provider.getContainerConnections).mockReturnValue([]); vi.mocked(getPodmanCli).mockReturnValue('podman-executable'); vi.mocked(getPodmanMachineName).mockImplementation(connection => connection.name); const listeners: ((value: unknown) => void)[] = []; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter); }); const providerContainerConnectionMock: ProviderContainerConnection = { connection: { type: 'podman', status: () => 'started', name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }, providerId: 'podman', }; describe('execute', () => { test('execute should get the podman extension from api', async () => { vi.mocked(extensions.getExtension).mockReturnValue(undefined); const manager = new PodmanConnection(rpcExtensionMock); await manager.execute(providerContainerConnectionMock.connection, ['ls']); expect(extensions.getExtension).toHaveBeenCalledWith('podman-desktop.podman'); }); test('execute should call getPodmanCli if extension not available', async () => { vi.mocked(extensions.getExtension).mockReturnValue(undefined); const manager = new PodmanConnection(rpcExtensionMock); await manager.execute(providerContainerConnectionMock.connection, ['ls']); expect(getPodmanCli).toHaveBeenCalledOnce(); expect(process.exec).toHaveBeenCalledWith('podman-executable', ['ls'], undefined); }); test('options should be propagated to process execution when provided', async () => { vi.mocked(extensions.getExtension).mockReturnValue(undefined); const manager = new PodmanConnection(rpcExtensionMock); await manager.execute(providerContainerConnectionMock.connection, ['ls'], { isAdmin: true, }); expect(getPodmanCli).toHaveBeenCalledOnce(); expect(process.exec).toHaveBeenCalledWith('podman-executable', ['ls'], { isAdmin: true, }); }); test('execute should use extension exec if available', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]); const podmanAPI = { exec: vi.fn(), }; vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension); const manager = new PodmanConnection(rpcExtensionMock); await manager.execute(providerContainerConnectionMock.connection, ['ls']); expect(getPodmanCli).not.toHaveBeenCalledOnce(); expect(podmanAPI.exec).toHaveBeenCalledWith(['ls'], { connection: providerContainerConnectionMock, }); }); test('an error should be throw if the provided container connection do not exists', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([]); const podmanAPI = { exec: vi.fn(), }; vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension); const manager = new PodmanConnection(rpcExtensionMock); await expect(async () => { await manager.execute(providerContainerConnectionMock.connection, ['ls'], { isAdmin: true, }); }).rejects.toThrowError('cannot find podman provider with connection name Podman Machine'); }); test('execute should propagate options to extension exec if available', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]); const podmanAPI = { exec: vi.fn(), }; vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension); const manager = new PodmanConnection(rpcExtensionMock); await manager.execute(providerContainerConnectionMock.connection, ['ls'], { isAdmin: true, }); expect(getPodmanCli).not.toHaveBeenCalledOnce(); expect(podmanAPI.exec).toHaveBeenCalledWith(['ls'], { isAdmin: true, connection: providerContainerConnectionMock, }); }); }); describe('executeSSH', () => { test('executeSSH should call getPodmanCli if extension not available', async () => { vi.mocked(extensions.getExtension).mockReturnValue(undefined); const manager = new PodmanConnection(rpcExtensionMock); await manager.executeSSH(providerContainerConnectionMock.connection, ['ls']); expect(getPodmanCli).toHaveBeenCalledOnce(); expect(process.exec).toHaveBeenCalledWith( 'podman-executable', ['machine', 'ssh', providerContainerConnectionMock.connection.name, 'ls'], undefined, ); }); test('executeSSH should use extension exec if available', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]); const podmanAPI = { exec: vi.fn(), }; vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension); const manager = new PodmanConnection(rpcExtensionMock); await manager.executeSSH(providerContainerConnectionMock.connection, ['ls']); expect(getPodmanCli).not.toHaveBeenCalledOnce(); expect(podmanAPI.exec).toHaveBeenCalledWith( ['machine', 'ssh', providerContainerConnectionMock.connection.name, 'ls'], { connection: providerContainerConnectionMock, }, ); }); test('executeSSH should propagate options to extension exec if available', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]); const podmanAPI = { exec: vi.fn(), }; vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension); const manager = new PodmanConnection(rpcExtensionMock); await manager.executeSSH(providerContainerConnectionMock.connection, ['ls'], { isAdmin: true, }); expect(getPodmanCli).not.toHaveBeenCalledOnce(); expect(podmanAPI.exec).toHaveBeenCalledWith( ['machine', 'ssh', providerContainerConnectionMock.connection.name, 'ls'], { isAdmin: true, connection: providerContainerConnectionMock, }, ); }); }); describe('podman connection initialization', () => { test('init should notify publisher', () => { const manager = new PodmanConnection(rpcExtensionMock); manager.init(); expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []); }); test('init should register all provider events', () => { const manager = new PodmanConnection(rpcExtensionMock); manager.init(); expect(provider.onDidUnregisterContainerConnection).toHaveBeenCalledWith(expect.any(Function)); expect(provider.onDidRegisterContainerConnection).toHaveBeenCalledWith(expect.any(Function)); expect(provider.onDidUpdateContainerConnection).toHaveBeenCalledWith(expect.any(Function)); expect(provider.onDidUpdateProvider).toHaveBeenCalledWith(expect.any(Function)); }); test('init should fetch all container connections', () => { const statusMock = vi.fn().mockReturnValue('started'); const providerContainerConnection: ProviderContainerConnection = { connection: { type: 'podman', status: statusMock, name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }, providerId: 'podman', }; vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnection]); const manager = new PodmanConnection(rpcExtensionMock); manager.init(); expect(manager.getContainerProviderConnectionInfo()).toStrictEqual([ { name: 'Podman Machine', providerId: 'podman', status: 'started', type: 'podman', vmType: VMType.UNKNOWN, }, ]); expect(manager.getContainerProviderConnections()).toStrictEqual([providerContainerConnection.connection]); expect(statusMock).toHaveBeenCalled(); }); }); async function getListeners(): Promise<{ onDidUnregisterContainerConnection: (e: UnregisterContainerConnectionEvent) => void; onDidRegisterContainerConnection: (e: RegisterContainerConnectionEvent) => void; onDidUpdateContainerConnection: (e: UpdateContainerConnectionEvent) => void; onDidUpdateProvider: (e: ProviderEvent) => void; podmanConnection: PodmanConnection; }> { const onDidUnregisterContainerConnectionPromise: Promise<(e: UnregisterContainerConnectionEvent) => void> = new Promise(resolve => { vi.mocked(provider.onDidUnregisterContainerConnection).mockImplementation( (fn: (e: UnregisterContainerConnectionEvent) => void) => { resolve(fn); return { dispose: vi.fn(), }; }, ); }); const onDidRegisterContainerConnectionPromise: Promise<(e: RegisterContainerConnectionEvent) => void> = new Promise( resolve => { vi.mocked(provider.onDidRegisterContainerConnection).mockImplementation( (fn: (e: RegisterContainerConnectionEvent) => void) => { resolve(fn); return { dispose: vi.fn(), }; }, ); }, ); const onDidUpdateContainerConnectionPromise: Promise<(e: UpdateContainerConnectionEvent) => void> = new Promise( resolve => { vi.mocked(provider.onDidUpdateContainerConnection).mockImplementation( (fn: (e: UpdateContainerConnectionEvent) => void) => { resolve(fn); return { dispose: vi.fn(), }; }, ); }, ); const onDidUpdateProviderPromise: Promise<(e: ProviderEvent) => void> = new Promise(resolve => { vi.mocked(provider.onDidUpdateProvider).mockImplementation((fn: (e: ProviderEvent) => void) => { resolve(fn); return { dispose: vi.fn(), }; }); }); const manager = new PodmanConnection(rpcExtensionMock); manager.init(); return { onDidUnregisterContainerConnection: await onDidUnregisterContainerConnectionPromise, onDidRegisterContainerConnection: await onDidRegisterContainerConnectionPromise, onDidUpdateContainerConnection: await onDidUpdateContainerConnectionPromise, onDidUpdateProvider: await onDidUpdateProviderPromise, podmanConnection: manager, }; } describe('container connection event', () => { test('onDidUnregisterContainerConnection should refresh and notify webview', async () => { const { onDidUnregisterContainerConnection } = await getListeners(); // simulate onDidUnregisterContainerConnection event onDidUnregisterContainerConnection({ providerId: 'podman' }); // ensure the webview has been notified await vi.waitFor(() => { expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []); }); }); test('onDidUnregisterContainerConnection should fire PodmanConnectionEvent', async () => { const { onDidUnregisterContainerConnection, podmanConnection } = await getListeners(); // register event listener const onPodmanConnectionEventListenerMock = vi.fn(); podmanConnection.onPodmanConnectionEvent(onPodmanConnectionEventListenerMock); // simulate onDidUnregisterContainerConnection event onDidUnregisterContainerConnection({ providerId: 'podman' }); expect(onPodmanConnectionEventListenerMock).toHaveBeenCalledWith({ status: 'unregister', }); }); test('onDidRegisterContainerConnection should notify webview', async () => { const { onDidRegisterContainerConnection, podmanConnection } = await getListeners(); // simulate a onDidRegisterContainerConnection event onDidRegisterContainerConnection({ providerId: 'podman', connection: { type: 'podman', name: 'Podman Machine', status: () => 'started', endpoint: { socketPath: './socket-path', }, }, }); // ensure the webview has been notified await vi.waitFor(() => { expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, [ { providerId: 'podman', name: 'Podman Machine', status: 'started', type: 'podman', vmType: VMType.UNKNOWN, }, ]); }); // ensure it has properly been added expect(podmanConnection.getContainerProviderConnectionInfo().length).toBe(1); }); test('onDidRegisterContainerConnection should fire PodmanConnectionEvent', async () => { const { onDidRegisterContainerConnection, podmanConnection } = await getListeners(); // register event listener const onPodmanConnectionEventListenerMock = vi.fn(); podmanConnection.onPodmanConnectionEvent(onPodmanConnectionEventListenerMock); // simulate a onDidRegisterContainerConnection event onDidRegisterContainerConnection({ providerId: 'podman', connection: { type: 'podman', name: 'Podman Machine', status: () => 'started', endpoint: { socketPath: './socket-path', }, }, }); expect(onPodmanConnectionEventListenerMock).toHaveBeenCalledWith({ status: 'register', }); }); test('onDidUpdateProvider should refresh and notify webview', async () => { const { onDidUpdateProvider } = await getListeners(); // simulate onDidUnregisterContainerConnection event onDidUpdateProvider({ name: 'podman', status: 'unknown', id: 'podman' }); // ensure the webview has been notified await vi.waitFor(() => { expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []); }); }); test('onDidUpdateContainerConnection should refresh and notify webview', async () => { const { onDidUpdateContainerConnection } = await getListeners(); // simulate onDidUnregisterContainerConnection event onDidUpdateContainerConnection({ status: 'started', providerId: 'podman', connection: { type: 'podman', name: 'Podman Machine', status: () => 'started', endpoint: { socketPath: './socket-path', }, }, }); // ensure the webview has been notified await vi.waitFor(() => { expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []); }); }); }); describe('getVMType', () => { test('empty response should throw an error', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: '[]', } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); await expect(() => manager.getVMType('machine')).rejects.toThrowError( 'podman machine list provided an empty array', ); }); test('empty array should return UNKNOWN when no name is provided', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: '[]', } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); expect(await manager.getVMType()).toBe(VMType.UNKNOWN); }); test('malformed response should throw an error', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: '{}', } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); await expect(() => manager.getVMType()).rejects.toThrowError('podman machine list provided a malformed response'); }); test('array with length greater than one require name', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: '[{}, {}]', } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); await expect(() => manager.getVMType()).rejects.toThrowError( 'name need to be provided when more than one podman machine is configured.', ); }); test('argument name should be used to filter the machine', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: JSON.stringify([ { Name: 'machine-1', VMType: VMType.QEMU, }, { Name: 'machine-2', VMType: VMType.APPLEHV, }, ]), } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); expect(await manager.getVMType('machine-2')).toBe(VMType.APPLEHV); }); test('invalid name should throw an error', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: JSON.stringify([ { Name: 'machine-1', }, { Name: 'machine-2', }, ]), } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); await expect(() => manager.getVMType('potatoes')).rejects.toThrowError( 'cannot find matching podman machine with name potatoes', ); }); test('single machine should return its VMType', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: JSON.stringify([ { Name: 'machine-1', VMType: VMType.WSL, }, ]), } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); expect(await manager.getVMType()).toBe(VMType.WSL); }); test('unknown string should return UNKNOWN', async () => { vi.mocked(process.exec).mockResolvedValue({ stdout: JSON.stringify([ { Name: 'machine-1', VMType: 'fake-content', }, ]), } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); expect(await manager.getVMType()).toBe(VMType.UNKNOWN); }); test.each(Object.values(VMType) as string[])('%s type should be the expected result', async vmtype => { vi.mocked(process.exec).mockResolvedValue({ stdout: JSON.stringify([ { VMType: vmtype, }, ]), } as unknown as RunResult); const manager = new PodmanConnection(rpcExtensionMock); expect(await manager.getVMType()).toBe(vmtype); }); }); const modelMock: ModelInfo & { memory: number } = { name: 'dummy', memory: 10, description: '', id: 'dummy-id', properties: {}, }; describe('checkContainerConnectionStatusAndResources', () => { test('return native on Linux', async () => { const manager = new PodmanConnection(rpcExtensionMock); vi.mocked(env).isLinux = true; const result = await manager.checkContainerConnectionStatusAndResources({ model: modelMock, context: 'inference', }); expect(result).toStrictEqual({ status: 'native', canRedirect: expect.any(Boolean), }); }); test('return noMachineInfo if there is no running podman connection', async () => { const manager = new PodmanConnection(rpcExtensionMock); vi.mocked(env).isLinux = false; const result = await manager.checkContainerConnectionStatusAndResources({ model: modelMock, context: 'inference', }); expect(result).toStrictEqual({ status: 'no-machine', canRedirect: expect.any(Boolean), }); }); test('return noMachineInfo if we are not able to retrieve any info about the podman connection', async () => { const manager = new PodmanConnection(rpcExtensionMock); vi.mocked(env).isLinux = false; vi.mocked(containerEngine.listInfos).mockResolvedValue([]); const result = await manager.checkContainerConnectionStatusAndResources({ model: modelMock, context: 'inference', }); expect(result).toStrictEqual({ status: 'no-machine', canRedirect: expect.any(Boolean), }); }); test('return lowResourceMachineInfo if the podman connection has not enough cpus', async () => { const manager = new PodmanConnection(rpcExtensionMock); vi.mocked(env).isLinux = false; vi.mocked(provider.getContainerConnections).mockReturnValue([ { connection: { type: 'podman', status: (): ProviderConnectionStatus => 'started', name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }, providerId: 'podman', }, ]); vi.mocked(containerEngine.listInfos).mockResolvedValue([ { engineId: 'engineId', engineName: 'enginerName', engineType: 'podman', cpus: 3, memory: 20, memoryUsed: 0, }, ]); manager.init(); const result = await manager.checkContainerConnectionStatusAndResources({ model: modelMock, context: 'inference', }); expect(result).toStrictEqual({ status: 'low-resources', canRedirect: expect.any(Boolean), name: 'Podman Machine', canEdit: false, cpus: 3, memoryIdle: 20, cpusExpected: 4, memoryExpected: 11, }); }); test('return runningMachineInfo if the podman connection has enough resources', async () => { const manager = new PodmanConnection(rpcExtensionMock); vi.mocked(env).isLinux = false; vi.mocked(provider.getContainerConnections).mockReturnValue([ { connection: { type: 'podman', status: (): ProviderConnectionStatus => 'started', name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }, providerId: 'podman', }, ]); vi.mocked(containerEngine.listInfos).mockResolvedValue([ { engineId: 'engineId', engineName: 'enginerName', engineType: 'podman', cpus: 12, memory: 20, memoryUsed: 0, }, ]); manager.init(); const result = await manager.checkContainerConnectionStatusAndResources({ model: modelMock, context: 'inference', }); expect(result).toStrictEqual({ name: 'Podman Machine', status: 'running', canRedirect: expect.any(Boolean), }); }); }); describe('getConnectionByEngineId', () => { test('no provider should raise an error', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([]); const manager = new PodmanConnection(rpcExtensionMock); manager.init(); await expect(() => manager.getConnectionByEngineId('fake engine')).rejects.toThrowError('connection not found'); expect(containerEngine.listInfos).not.toHaveBeenCalled(); }); test('empty listInfos response should raise an error', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([ { connection: { type: 'podman', status: (): ProviderConnectionStatus => 'started', name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }, providerId: 'podman', }, ]); vi.mocked(containerEngine.listInfos).mockResolvedValue([]); const manager = new PodmanConnection(rpcExtensionMock); manager.init(); await expect(() => manager.getConnectionByEngineId('fake engine')).rejects.toThrowError('connection not found'); expect(containerEngine.listInfos).toHaveBeenCalled(); }); test('invalid engineId should raise an error', async () => { vi.mocked(provider.getContainerConnections).mockReturnValue([ { connection: { type: 'podman', status: (): ProviderConnectionStatus => 'started', name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }, providerId: 'podman', }, ]); vi.mocked(containerEngine.listInfos).mockResolvedValue([ { engineId: 'engineId', engineName: 'enginerName', engineType: 'podman', cpus: 12, memory: 20, memoryUsed: 0, }, ]); const manager = new PodmanConnection(rpcExtensionMock); manager.init(); await expect(() => manager.getConnectionByEngineId('fake engine')).rejects.toThrowError('connection not found'); expect(containerEngine.listInfos).toHaveBeenCalled(); }); test('valid engineId should return matching connection', async () => { const connectionMock: ContainerProviderConnection = { type: 'podman', status: () => 'started', name: 'Podman Machine', endpoint: { socketPath: './socket-path', }, }; vi.mocked(provider.getContainerConnections).mockReturnValue([ { connection: connectionMock, providerId: 'podman', }, ]); vi.mocked(containerEngine.listInfos).mockResolvedValue([ { engineId: 'engineId', engineName: 'enginerName', engineType: 'podman', cpus: 12, memory: 20, memoryUsed: 0, }, ]); const manager = new PodmanConnection(rpcExtensionMock); manager.init(); const connection = await manager.getConnectionByEngineId('engineId'); expect(containerEngine.listInfos).toHaveBeenCalled(); expect(connection).toBe(connectionMock); }); }); ================================================ FILE: packages/backend/src/managers/podmanConnection.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ContainerProviderConnection, Disposable, Event, RegisterContainerConnectionEvent, UpdateContainerConnectionEvent, RunResult, RunOptions, ProviderContainerConnection, } from '@podman-desktop/api'; import { containerEngine, env, navigation, EventEmitter, process, provider, extensions } from '@podman-desktop/api'; import { getPodmanMachineName, type MachineJSON, MIN_CPUS_VALUE, getPodmanCli } from '../utils/podman'; import { VMType } from '@shared/models/IPodman'; import { Publisher } from '../utils/Publisher'; import type { CheckContainerConnectionResourcesOptions, ContainerConnectionInfo, ContainerProviderConnectionInfo, } from '@shared/models/IContainerConnectionInfo'; import { MSG_PODMAN_CONNECTION_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export interface PodmanConnectionEvent { status: 'stopped' | 'started' | 'unregister' | 'register'; } export interface PodmanRunOptions extends RunOptions { connection?: ProviderContainerConnection; } export class PodmanConnection extends Publisher implements Disposable { // Map of providerId with corresponding connections #providers: Map; #disposables: Disposable[]; private readonly _onPodmanConnectionEvent = new EventEmitter(); readonly onPodmanConnectionEvent: Event = this._onPodmanConnectionEvent.event; constructor(rpcExtension: RpcExtension) { super(rpcExtension, MSG_PODMAN_CONNECTION_UPDATE, () => this.getContainerProviderConnectionInfo()); this.#providers = new Map(); this.#disposables = []; } /** * Execute the podman cli with the arguments provided * * @example * ``` * const result = await podman.execute(connection, ['machine', 'ls', '--format=json']); * ``` * @param connection * @param args * @param options */ execute(connection: ContainerProviderConnection, args: string[], options?: RunOptions): Promise { const podman = extensions.getExtension('podman-desktop.podman'); if (!podman) { console.warn('cannot find podman extension api'); return this.executeLegacy(args, options); } const podmanApi: { exec(args: string[], options?: PodmanRunOptions): Promise; } = podman.exports; return podmanApi.exec(args, { ...options, connection: this.getProviderContainerConnection(connection), }); } /** * Execute a command inside the podman machine * * @example * ``` * const result = await podman.executeSSH(connection, ['ls', '/dev']); * ``` * @param connection * @param args * @param options */ executeSSH(connection: ContainerProviderConnection, args: string[], options?: RunOptions): Promise { return this.execute(connection, ['machine', 'ssh', this.getNameLegacyCompatibility(connection), ...args], options); } /** * Before 1.13, the podman extension was not exposing any api. * * Therefore, to support old version we need to get the podman executable ourself * @deprecated */ protected executeLegacy(args: string[], options?: RunOptions): Promise { return process.exec(getPodmanCli(), [...args], options); } /** * Before 1.13, the {@link ContainerProviderConnection.name} field was used as friendly user * field also. * * Therefore, we could have `Podman Machine Default` as name, where the real machine was `podman-machine-default`. * @param connection * @deprecated */ protected getNameLegacyCompatibility(connection: ContainerProviderConnection): string { return getPodmanMachineName(connection); } getContainerProviderConnections(): ContainerProviderConnection[] { return Array.from(this.#providers.values()).flat(); } /** * This method flatten the */ getContainerProviderConnectionInfo(): ContainerProviderConnectionInfo[] { const output: ContainerProviderConnectionInfo[] = []; for (const [providerId, connections] of Array.from(this.#providers.entries())) { output.push( ...connections.map( (connection): ContainerProviderConnectionInfo => ({ providerId: providerId, name: connection.name, vmType: this.parseVMType(connection.vmType), type: 'podman', status: connection.status(), }), ), ); } return output; } init(): void { // setup listeners this.listen(); this.refreshProviders(); } dispose(): void { this.#disposables.forEach(disposable => disposable.dispose()); } /** * This method allow us to get the ProviderContainerConnection given a ContainerProviderConnection * @param connection * @protected */ protected getProviderContainerConnection(connection: ContainerProviderConnection): ProviderContainerConnection { const providers: ProviderContainerConnection[] = provider.getContainerConnections(); const podmanProvider = providers .filter(({ connection }) => connection.type === 'podman') .find(provider => provider.connection.name === connection.name); if (!podmanProvider) throw new Error(`cannot find podman provider with connection name ${connection.name}`); return podmanProvider; } protected refreshProviders(): void { // clear all providers this.#providers.clear(); const providers: ProviderContainerConnection[] = provider.getContainerConnections(); // register the podman container connection providers .filter(({ connection }) => connection.type === 'podman') .forEach(({ providerId, connection }) => { this.#providers.set(providerId, [connection, ...(this.#providers.get(providerId) ?? [])]); }); // notify this.notify(); } private listen(): void { // capture unregister event this.#disposables.push( provider.onDidUnregisterContainerConnection(() => { this.refreshProviders(); this._onPodmanConnectionEvent.fire({ status: 'unregister', }); }), ); this.#disposables.push( provider.onDidRegisterContainerConnection(({ providerId, connection }: RegisterContainerConnectionEvent) => { if (connection.type !== 'podman') { return; } // update connection this.#providers.set(providerId, [connection, ...(this.#providers.get(providerId) ?? [])]); this.notify(); this._onPodmanConnectionEvent.fire({ status: 'register', }); }), ); this.#disposables.push( provider.onDidUpdateContainerConnection(({ status }: UpdateContainerConnectionEvent) => { switch (status) { case 'started': case 'stopped': this._onPodmanConnectionEvent.fire({ status: status, }); this.notify(); break; default: break; } }), ); this.#disposables.push( provider.onDidUpdateProvider(() => { this.refreshProviders(); }), ); } protected parseVMType(vmtype: string | undefined): VMType { if (!vmtype) return VMType.UNKNOWN; const type = Object.values(VMType).find(s => s === vmtype); if (type === undefined) { return VMType.UNKNOWN; } return type; } /** * Get the VMType of the podman machine * @param name the machine name, from {@link ContainerProviderConnection} * @deprecated should uses the `getContainerProviderConnectionInfo()` */ async getVMType(name?: string): Promise { const { stdout } = await process.exec(getPodmanCli(), ['machine', 'list', '--format', 'json']); const parsed: unknown = JSON.parse(stdout); if (!Array.isArray(parsed)) throw new Error('podman machine list provided a malformed response'); if (parsed.length === 0 && name) throw new Error('podman machine list provided an empty array'); // On Linux we might not have any machine if (parsed.length === 0) return VMType.UNKNOWN; if (parsed.length > 1 && !name) throw new Error('name need to be provided when more than one podman machine is configured.'); let output: MachineJSON; if (name) { output = parsed.find(machine => typeof machine === 'object' && 'Name' in machine && machine.Name === name); if (!output) throw new Error(`cannot find matching podman machine with name ${name}`); } else { output = parsed[0]; } return this.parseVMType(output.VMType); } getContainerProviderConnection(connection: ContainerProviderConnectionInfo): ContainerProviderConnection { const output = (this.#providers.get(connection.providerId) ?? []).find( mConnection => connection.name === mConnection.name, ); if (!output) throw new Error(`no container provider connection found for connection name ${connection.name}`); return output; } findRunningContainerProviderConnection(): ContainerProviderConnection | undefined { for (const connections of Array.from(this.#providers.values())) { const result = connections.find(connection => connection.status() === 'started'); if (result) return result; } return undefined; } /** * This method return the ContainerProviderConnection corresponding to an engineId * @param engineId */ async getConnectionByEngineId(engineId: string): Promise { const connections = Array.from(this.#providers.values()).flat(); for (const connection of connections) { const infos = await containerEngine.listInfos({ provider: connection }); if (infos.length === 0) continue; if (infos[0].engineId === engineId) return connection; } throw new Error('connection not found'); } async checkContainerConnectionStatusAndResources( options: CheckContainerConnectionResourcesOptions, ): Promise { // starting from podman desktop 1.10 we have the navigate functions const hasNavigateFunction = !!navigation.navigateToResources; // if we do not precise the connection and are on linux we assume native usage if (env.isLinux && !options.connection) { return { status: 'native', canRedirect: hasNavigateFunction, }; } let connection: ContainerProviderConnection | undefined = undefined; if (options.connection) { connection = this.getContainerProviderConnection(options.connection); } else { connection = this.findRunningContainerProviderConnection(); } if (!connection) { return { status: 'no-machine', canRedirect: hasNavigateFunction, }; } const engineInfos = await containerEngine.listInfos({ provider: connection, }); if (engineInfos.length === 0) { return { status: 'no-machine', canRedirect: hasNavigateFunction, }; } const engineInfo = engineInfos[0]; if (!engineInfo) { return { status: 'no-machine', canRedirect: hasNavigateFunction, }; } const hasCpus = engineInfo.cpus !== undefined && engineInfo.cpus >= MIN_CPUS_VALUE; const multiplier = options.context === 'recipe' ? 1.25 : 1.1; const memoryExpected = options.model.memory * multiplier; let hasMemory: boolean = true; if (engineInfo.memory !== undefined && engineInfo.memoryUsed !== undefined) { hasMemory = engineInfo.memory - engineInfo.memoryUsed >= memoryExpected; } let memoryIdle: number = 0; if (engineInfo.memory !== undefined && engineInfo.memoryUsed !== undefined) { memoryIdle = engineInfo.memory - engineInfo.memoryUsed; } if (!hasCpus || !hasMemory) { return { name: connection.name, cpus: engineInfo.cpus ?? 0, memoryIdle: memoryIdle, cpusExpected: MIN_CPUS_VALUE, memoryExpected: memoryExpected, status: 'low-resources', canEdit: !!connection.lifecycle?.edit, canRedirect: hasNavigateFunction, }; } return { name: connection.name, status: 'running', canRedirect: hasNavigateFunction, }; } } ================================================ FILE: packages/backend/src/managers/recipes/BuilderManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import type { Recipe } from '@shared/models/IRecipe'; import type { ContainerConfig } from '../../models/AIConfig'; import fs from 'node:fs'; import { BuilderManager } from './BuilderManager'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { ContainerProviderConnection, ImageInfo } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import { VMType } from '@shared/models/IPodman'; const taskRegistry = { getTask: vi.fn(), createTask: vi.fn(), updateTask: vi.fn(), delete: vi.fn(), deleteAll: vi.fn(), getTasks: vi.fn(), getTasksByLabels: vi.fn(), deleteByLabels: vi.fn(), } as unknown as TaskRegistry; vi.mock('@podman-desktop/api', () => ({ containerEngine: { buildImage: vi.fn(), listImages: vi.fn(), }, })); const connectionMock: ContainerProviderConnection = { name: 'Podman Machine', vmType: VMType.UNKNOWN, } as unknown as ContainerProviderConnection; beforeEach(() => { vi.resetAllMocks(); vi.mocked(taskRegistry.createTask).mockImplementation((name, state, labels) => ({ id: 'random', name: name, state: state, labels: labels ?? {}, error: undefined, })); }); describe('buildImages', () => { const recipe = { id: 'recipe1', } as Recipe; const containers: ContainerConfig[] = [ { name: 'container1', contextdir: 'contextdir1', containerfile: 'Containerfile', arch: ['amd64'], modelService: false, gpu_env: [], ports: [8080], }, ]; const manager = new BuilderManager(taskRegistry); test('setTaskState should be called with error if context does not exist', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(false); vi.mocked(containerEngine.listImages).mockRejectedValue([]); await expect(manager.build(connectionMock, recipe, containers, 'config')).rejects.toThrow( 'Context configured does not exist.', ); }); test('setTaskState should be called with error if buildImage execution fails', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); vi.mocked(containerEngine.buildImage).mockRejectedValue('error'); vi.mocked(containerEngine.listImages).mockRejectedValue([]); await expect(manager.build(connectionMock, recipe, containers, 'config')).rejects.toThrow( 'Something went wrong while building the image: error', ); expect(taskRegistry.updateTask).toBeCalledWith({ error: 'Something went wrong while building the image: error', name: 'Building container1', id: expect.any(String), state: expect.any(String), labels: {}, }); }); test('setTaskState should be called with error if unable to find the image after built', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); vi.mocked(containerEngine.buildImage).mockResolvedValue({}); vi.mocked(containerEngine.listImages).mockResolvedValue([]); await expect(manager.build(connectionMock, recipe, containers, 'config')).rejects.toThrow( 'no image found for container1:latest', ); expect(taskRegistry.updateTask).toBeCalledWith({ error: 'no image found for container1:latest', name: 'Building container1', id: expect.any(String), state: expect.any(String), labels: {}, }); }); test('succeed if building image do not fail', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); vi.mocked(containerEngine.buildImage).mockResolvedValue({}); vi.mocked(containerEngine.listImages).mockResolvedValue([ { RepoTags: ['recipe1-container1:latest'], engineId: 'engine', Id: 'id1', } as unknown as ImageInfo, ]); const imageInfoList = await manager.build(connectionMock, recipe, containers, 'config'); expect(taskRegistry.updateTask).toBeCalledWith({ name: 'Building container1', id: expect.any(String), state: 'success', labels: {}, }); expect(imageInfoList.length).toBe(1); expect(imageInfoList[0].ports.length).toBe(1); expect(imageInfoList[0].ports[0]).equals('8080'); expect(containerEngine.buildImage).toHaveBeenCalledWith( 'contextdir1', expect.any(Function), expect.objectContaining({ provider: connectionMock, }), ); }); }); ================================================ FILE: packages/backend/src/managers/recipes/BuilderManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type BuildImageOptions, type Disposable, containerEngine, type ContainerProviderConnection, } from '@podman-desktop/api'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { RecipeImage, Recipe } from '@shared/models/IRecipe'; import type { ContainerConfig } from '../../models/AIConfig'; import type { Task } from '@shared/models/ITask'; import path from 'node:path'; import { getParentDirectory } from '../../utils/pathUtils'; import fs from 'node:fs'; import { getImageTag } from '../../utils/imagesUtils'; import { IMAGE_LABEL_APP_PORTS, IMAGE_LABEL_APPLICATION_NAME, IMAGE_LABEL_MODEL_SERVICE, IMAGE_LABEL_RECIPE_ID, } from '../../utils/RecipeConstants'; export class BuilderManager implements Disposable { private controller: Map = new Map(); constructor(private taskRegistry: TaskRegistry) {} /** * On dispose, the builder will abort all current build. */ dispose(): void { // eslint-disable-next-line sonarjs/array-callback-without-return Array.from(this.controller.values()).every(controller => controller.abort('disposing builder manager')); } async build( connection: ContainerProviderConnection, recipe: Recipe, containers: ContainerConfig[], configPath: string, labels: { [key: string]: string } = {}, ): Promise { const containerTasks: { [key: string]: Task } = Object.fromEntries( containers.map(container => [ container.name, this.taskRegistry.createTask(`Building ${container.name}`, 'loading', labels), ]), ); const imageInfoList: RecipeImage[] = []; // Promise all the build images const abortController = new AbortController(); // only one build per recipe is supported if (this.controller.has(recipe.id)) { this.controller.get(recipe.id)?.abort('multiple build not supported.'); } this.controller.set(recipe.id, abortController); try { await Promise.all( containers.map(container => { const task = containerTasks[container.name]; // We use the parent directory of our configFile as the rootdir, then we append the contextDir provided const context = path.join(getParentDirectory(configPath), container.contextdir); console.log(`Application Manager using context ${context} for container ${container.name}`); // Ensure the context provided exist otherwise throw an Error if (!fs.existsSync(context)) { task.error = 'The context provided does not exist.'; this.taskRegistry.updateTask(task); throw new Error('Context configured does not exist.'); } const imageTag = getImageTag(recipe, container); const buildOptions: BuildImageOptions = { provider: connection, containerFile: container.containerfile, tag: imageTag, labels: { ...labels, [IMAGE_LABEL_RECIPE_ID]: recipe.id, [IMAGE_LABEL_MODEL_SERVICE]: container.modelService ? 'true' : 'false', [IMAGE_LABEL_APPLICATION_NAME]: container.name, [IMAGE_LABEL_APP_PORTS]: (container.ports ?? []).join(','), }, abortController: abortController, }; let error = false; return containerEngine .buildImage( context, (event, data) => { // todo: do something with the event if (event === 'error' || (event === 'finish' && data !== '')) { console.error('Something went wrong while building the image: ', data); task.error = `Something went wrong while building the image: ${data}`; this.taskRegistry.updateTask(task); error = true; } }, buildOptions, ) .catch((err: unknown) => { task.error = `Something went wrong while building the image: ${String(err)}`; this.taskRegistry.updateTask(task); throw new Error(`Something went wrong while building the image: ${String(err)}`); }) .then(() => { if (error) { throw new Error(`Something went wrong while building the image: ${imageTag}`); } }); }), ); } catch (err: unknown) { abortController.abort(); throw err; } finally { // remove abort controller this.controller.delete(recipe.id); } // after image are built we return their data const images = await containerEngine.listImages({ provider: connection }); await Promise.all( containers.map(async container => { const task = containerTasks[container.name]; const imageTag = getImageTag(recipe, container); const image = images.find(im => { return im.RepoTags?.some(tag => tag.endsWith(imageTag)); }); if (!image) { task.error = `no image found for ${container.name}:latest`; this.taskRegistry.updateTask(task); throw new Error(`no image found for ${container.name}:latest`); } let imageName: string | undefined = undefined; if (image.RepoTags && image.RepoTags.length > 0) { imageName = image.RepoTags[0]; } imageInfoList.push({ id: image.Id, engineId: image.engineId, name: imageName, modelService: container.modelService, ports: container.ports?.map(p => `${p}`) ?? [], appName: container.name, recipeId: recipe.id, }); task.state = 'success'; this.taskRegistry.updateTask(task); }), ); return imageInfoList; } } ================================================ FILE: packages/backend/src/managers/recipes/PodManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, vi, expect, test } from 'vitest'; import { PodManager } from './PodManager'; import type { ContainerInspectInfo, ContainerJSONEvent, PodCreateOptions, PodInfo } from '@podman-desktop/api'; import { EventEmitter, containerEngine } from '@podman-desktop/api'; vi.mock('@podman-desktop/api', () => ({ containerEngine: { listPods: vi.fn(), stopPod: vi.fn(), removePod: vi.fn(), startPod: vi.fn(), createPod: vi.fn(), inspectContainer: vi.fn(), onEvent: vi.fn(), }, EventEmitter: vi.fn(), })); beforeEach(() => { vi.resetAllMocks(); // we return the id as health status vi.mocked(containerEngine.inspectContainer).mockImplementation(async (engineId: string, id: string) => { return { State: { Health: { Status: id, }, }, } as unknown as ContainerInspectInfo; }); // mocking the EventEmitter mechanism const listeners: ((value: unknown) => void)[] = []; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter); }); test('getAllPods should use container engine list pods method', async () => { await new PodManager().getAllPods(); expect(containerEngine.listPods).toHaveBeenCalledOnce(); }); test('findPodByLabelsValues should only return pods with labels matching values', async () => { vi.mocked(containerEngine.listPods).mockResolvedValue([ { Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-invalid', hello: 'eggs', }, }, { Id: 'pod-id-2', Labels: { hello: 'world', 'dummy-key': 'dummy-valid', }, }, { Id: 'pod-id-2', Labels: { hello: 'world', 'dummy-key': 'invalid', }, }, { Id: 'pod-id-3', }, ] as unknown as PodInfo[]); const pod = await new PodManager().findPodByLabelsValues({ 'dummy-key': 'dummy-valid', hello: 'world', }); expect(pod).toBeDefined(); expect(pod?.Id).toBe('pod-id-2'); }); test('getPodsWithLabels should only return pods with proper labels', async () => { vi.mocked(containerEngine.listPods).mockResolvedValue([ { Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-value', hello: 'world', }, }, { Id: 'pod-id-2', Labels: { hello: 'world', 'dummy-key': 'dummy-value', }, }, { Id: 'pod-id-3', }, ] as unknown as PodInfo[]); const pods = await new PodManager().getPodsWithLabels(['dummy-key']); expect(pods.length).toBe(2); expect(pods.find(pod => pod.Id === 'pod-id-1')).toBeDefined(); expect(pods.find(pod => pod.Id === 'pod-id-2')).toBeDefined(); expect(pods.find(pod => pod.Id === 'pod-id-3')).toBeUndefined(); }); describe('getHealth', () => { test('getHealth with no container should be none', async () => { const health = await new PodManager().getHealth({ Containers: [], } as unknown as PodInfo); expect(health).toBe('none'); }); test('getHealth with one healthy should be healthy', async () => { const health = await new PodManager().getHealth({ Containers: [ { Id: 'healthy', }, ], } as unknown as PodInfo); expect(health).toBe('healthy'); }); test('getHealth with many healthy and one unhealthy should be unhealthy', async () => { const health = await new PodManager().getHealth({ Containers: [ { Id: 'healthy', }, { Id: 'unhealthy', }, { Id: 'healthy', }, { Id: 'starting', }, ], } as unknown as PodInfo); expect(health).toBe('unhealthy'); }); test('getHealth with many healthy and one starting should be starting', async () => { const health = await new PodManager().getHealth({ Containers: [ { Id: 'healthy', }, { Id: 'healthy', }, { Id: 'starting', }, ], } as unknown as PodInfo); expect(health).toBe('starting'); }); }); describe('getPod', () => { test('getPod should throw an error if none is matching', async () => { vi.mocked(containerEngine.listPods).mockResolvedValue([]); await expect(async () => { await new PodManager().getPod('fakeEngineId', 'fakePodId'); }).rejects.toThrowError('pod with engineId fakeEngineId and Id fakePodId cannot be found.'); }); test('getPod should return matching pod', async () => { vi.mocked(containerEngine.listPods).mockResolvedValue([ { engineId: 'engine-1', Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-value', hello: 'world', }, }, { engineId: 'engine-2', Id: 'pod-id-2', Labels: { hello: 'world', 'dummy-key': 'dummy-value', }, }, { engineId: 'engine-3', Id: 'pod-id-3', }, ] as unknown as PodInfo[]); const pod = await new PodManager().getPod('engine-3', 'pod-id-3'); expect(pod).toBeDefined(); expect(pod.engineId).toBe('engine-3'); expect(pod.Id).toBe('pod-id-3'); }); }); test('stopPod should call containerEngine.stopPod', async () => { await new PodManager().stopPod('dummy-engine-id', 'dummy-pod-id'); expect(containerEngine.stopPod).toHaveBeenCalledWith('dummy-engine-id', 'dummy-pod-id'); }); test('removePod should call containerEngine.removePod', async () => { await new PodManager().removePod('dummy-engine-id', 'dummy-pod-id'); expect(containerEngine.removePod).toHaveBeenCalledWith('dummy-engine-id', 'dummy-pod-id'); }); test('startPod should call containerEngine.startPod', async () => { await new PodManager().startPod('dummy-engine-id', 'dummy-pod-id'); expect(containerEngine.startPod).toHaveBeenCalledWith('dummy-engine-id', 'dummy-pod-id'); }); test('createPod should call containerEngine.createPod', async () => { const options: PodCreateOptions = { name: 'dummy-name', portmappings: [], }; await new PodManager().createPod(options); expect(containerEngine.createPod).toHaveBeenCalledWith(options); }); test('dispose should dispose onEvent disposable', () => { const disposableMock = vi.fn(); vi.mocked(containerEngine.onEvent).mockImplementation(() => { return { dispose: disposableMock }; }); const podManager = new PodManager(); podManager.init(); podManager.dispose(); expect(containerEngine.onEvent).toHaveBeenCalled(); expect(disposableMock).toHaveBeenCalled(); }); const getInitializedPodManager = (): { onEventListener: (e: ContainerJSONEvent) => unknown; podManager: PodManager; } => { let func: ((e: ContainerJSONEvent) => unknown) | undefined = undefined; vi.mocked(containerEngine.onEvent).mockImplementation(fn => { func = fn; return { dispose: vi.fn() }; }); const podManager = new PodManager(); podManager.init(); if (!func) throw new Error('listener should be defined'); return { onEventListener: func, podManager }; }; describe('events', () => { test('onStartPodEvent listener should be called on start pod event', async () => { vi.mocked(containerEngine.listPods).mockResolvedValue([ { Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-value', hello: 'world', }, }, ] as unknown as PodInfo[]); const { onEventListener, podManager } = getInitializedPodManager(); const startListenerMock = vi.fn(); podManager.onStartPodEvent(startListenerMock); onEventListener({ id: 'pod-id-1', Type: 'pod', type: '', status: 'start' }); await vi.waitFor(() => { expect(startListenerMock).toHaveBeenCalledWith({ Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-value', hello: 'world', }, }); }); }); test('onStopPodEvent listener should be called on start pod event', async () => { vi.mocked(containerEngine.listPods).mockResolvedValue([ { Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-value', hello: 'world', }, }, ] as unknown as PodInfo[]); const { onEventListener, podManager } = getInitializedPodManager(); const stopListenerMock = vi.fn(); podManager.onStopPodEvent(stopListenerMock); onEventListener({ id: 'pod-id-1', Type: 'pod', type: '', status: 'stop' }); await vi.waitFor(() => { expect(stopListenerMock).toHaveBeenCalledWith({ Id: 'pod-id-1', Labels: { 'dummy-key': 'dummy-value', hello: 'world', }, }); }); }); test('onRemovePodEvent listener should be called on start pod event', async () => { const { onEventListener, podManager } = getInitializedPodManager(); const removeListenerMock = vi.fn(); podManager.onRemovePodEvent(removeListenerMock); onEventListener({ id: 'pod-id-1', Type: 'pod', type: '', status: 'remove' }); await vi.waitFor(() => { expect(removeListenerMock).toHaveBeenCalledWith({ podId: 'pod-id-1', }); }); expect(containerEngine.listPods).not.toHaveBeenCalled(); }); }); ================================================ FILE: packages/backend/src/managers/recipes/PodManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { Disposable, PodCreateOptions, PodInfo, Event } from '@podman-desktop/api'; import { containerEngine, EventEmitter } from '@podman-desktop/api'; import type { PodHealth } from '@shared/models/IApplicationState'; import { getPodHealth } from '../../utils/podsUtils'; export interface PodEvent { podId: string; } export class PodManager implements Disposable { #eventDisposable: Disposable | undefined; // start pod events private readonly _onStartPodEvent = new EventEmitter(); readonly onStartPodEvent: Event = this._onStartPodEvent.event; // stop pod events private readonly _onStopPodEvent = new EventEmitter(); readonly onStopPodEvent: Event = this._onStopPodEvent.event; // remove pod events private readonly _onRemovePodEvent = new EventEmitter(); readonly onRemovePodEvent: Event = this._onRemovePodEvent.event; dispose(): void { this.#eventDisposable?.dispose(); } init(): void { this.#eventDisposable = containerEngine.onEvent(async event => { // filter on pod event type if (event.Type !== 'pod') { return; } if (event.status === 'remove') { return this._onRemovePodEvent.fire({ podId: event.id, }); } const pod: PodInfo = await this.getPodById(event.id); switch (event.status) { case 'start': this._onStartPodEvent.fire(pod); break; case 'stop': this._onStopPodEvent.fire(pod); break; } }); } /** * Utility method to get all the pods */ getAllPods(): Promise { return containerEngine.listPods(); } /** * return the first pod matching the provided labels and their associated value * @param requestedLabels the labels the pod must be matching */ async findPodByLabelsValues(requestedLabels: Record): Promise { const pods = await this.getAllPods(); return pods.find(pod => { const labels = pod.Labels; // eslint-disable-next-line sonarjs/different-types-comparison if (labels === undefined) return false; for (const [key, value] of Object.entries(requestedLabels)) { if (!(key in labels) || labels[key] !== value) return false; } return true; }); } /** * return pods containing all the labels provided * This method does not check for the values, only existence * @param labels */ async getPodsWithLabels(labels: string[]): Promise { const pods = await this.getAllPods(); return pods.filter(pod => labels.every(label => !!pod.Labels && label in pod.Labels)); } /** * Given a pod Info, will fetch the health status of each containing composing it, and * will return a PodHealth * @param pod the pod to inspect */ async getHealth(pod: PodInfo): Promise { const containerStates: (string | undefined)[] = await Promise.all( pod.Containers.map(container => containerEngine.inspectContainer(pod.engineId, container.Id).then(data => data.State.Health?.Status), ), ); return getPodHealth(containerStates); } /** * This handy method is private as we do not want expose method not providing * the engineId, but this is required because PodEvent do not provide the engineId * @param id * @private */ private async getPodById(id: string): Promise { const pods = await this.getAllPods(); const result = pods.find(pod => pod.Id === id); if (!result) throw new Error(`pod with Id ${id} cannot be found.`); return result; } async getPod(engineId: string, Id: string): Promise { const pods = await this.getAllPods(); const result = pods.find(pod => pod.engineId === engineId && pod.Id === Id); if (!result) throw new Error(`pod with engineId ${engineId} and Id ${Id} cannot be found.`); return result; } async stopPod(engineId: string, id: string): Promise { return containerEngine.stopPod(engineId, id); } async removePod(engineId: string, id: string): Promise { return containerEngine.removePod(engineId, id); } async startPod(engineId: string, id: string): Promise { return containerEngine.startPod(engineId, id); } async createPod(podOptions: PodCreateOptions): Promise<{ engineId: string; Id: string }> { return containerEngine.createPod(podOptions); } } ================================================ FILE: packages/backend/src/managers/recipes/RecipeManager.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { BuilderManager } from './BuilderManager'; import type { GitManager } from '../gitManager'; import type { LocalRepositoryRegistry } from '../../registries/LocalRepositoryRegistry'; import { RecipeManager } from './RecipeManager'; import { containerEngine, type ContainerProviderConnection } from '@podman-desktop/api'; import type { Recipe } from '@shared/models/IRecipe'; import type { Stats } from 'node:fs'; import { existsSync, statSync } from 'node:fs'; import { AIConfigFormat, parseYamlFile } from '../../models/AIConfig'; import { goarch } from '../../utils/arch'; import { VMType } from '@shared/models/IPodman'; import type { InferenceManager } from '../inference/inferenceManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { ApplicationOptions } from '../../models/ApplicationOptions'; const taskRegistryMock = { createTask: vi.fn(), updateTask: vi.fn(), } as unknown as TaskRegistry; const builderManagerMock = { build: vi.fn(), } as unknown as BuilderManager; const gitManagerMock = { processCheckout: vi.fn(), } as unknown as GitManager; const localRepositoriesMock = { register: vi.fn(), } as unknown as LocalRepositoryRegistry; const inferenceManagerMock = {} as unknown as InferenceManager; const recipeMock: Recipe = { id: 'recipe-test', name: 'Test Recipe', categories: [], description: 'test recipe description', repository: 'http://test-repository.test', readme: 'test recipe readme', }; const connectionMock: ContainerProviderConnection = { name: 'Podman Machine', vmType: VMType.UNKNOWN, } as unknown as ContainerProviderConnection; const modelInfoMock: ModelInfo = { id: 'modelId', name: 'Model', description: 'model to test', } as unknown as ModelInfo; vi.mock('../../models/AIConfig', () => ({ AIConfigFormat: { CURRENT: 'current', }, parseYamlFile: vi.fn(), })); vi.mock('node:fs', () => ({ existsSync: vi.fn(), statSync: vi.fn(), })); vi.mock('@podman-desktop/api', () => ({ containerEngine: { listImages: vi.fn(), }, })); vi.mock('../../utils/arch', () => ({ goarch: vi.fn(), })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(containerEngine.listImages).mockResolvedValue([]); vi.mocked(taskRegistryMock.createTask).mockImplementation((name, state, labels) => ({ name, state, labels, id: 'fake-task', })); vi.mocked(existsSync).mockReturnValue(true); vi.mocked(statSync).mockReturnValue({ isDirectory: () => true, } as unknown as Stats); vi.mocked(parseYamlFile).mockReturnValue({ version: AIConfigFormat.CURRENT, application: { containers: [ { arch: ['dummy-arch'], modelService: false, name: 'test-container', gpu_env: [], contextdir: '.', }, ], }, }); vi.mocked(goarch).mockReturnValue('dummy-arch'); }); async function getInitializedRecipeManager(): Promise { const manager = new RecipeManager( 'test-app-user-directory', gitManagerMock, taskRegistryMock, builderManagerMock, localRepositoriesMock, inferenceManagerMock, ); manager.init(); return manager; } describe('cloneRecipe', () => { test('error in checkout should set the task to error and propagate it', async () => { vi.mocked(gitManagerMock.processCheckout).mockRejectedValue(new Error('clone error')); const manager = await getInitializedRecipeManager(); await expect(() => { return manager.cloneRecipe(recipeMock); }).rejects.toThrowError('clone error'); expect(taskRegistryMock.updateTask).toHaveBeenCalledWith( expect.objectContaining({ state: 'error', }), ); }); test('labels should be propagated', async () => { const manager = await getInitializedRecipeManager(); await manager.cloneRecipe(recipeMock, { 'test-label': 'test-value', }); expect(gitManagerMock.processCheckout).toHaveBeenCalledWith({ repository: recipeMock.repository, ref: recipeMock.ref, targetDirectory: expect.any(String), }); expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Checking out repository', 'loading', { 'test-label': 'test-value', 'recipe-id': recipeMock.id, git: 'checkout', }); expect(localRepositoriesMock.register).toHaveBeenCalledWith({ path: expect.any(String), sourcePath: expect.any(String), labels: { 'recipe-id': recipeMock.id, }, }); }); }); describe.each([true, false])('buildRecipe, with model is %o', withModel => { let applicationOptions: ApplicationOptions; beforeEach(() => { applicationOptions = withModel ? { connection: connectionMock, recipe: recipeMock, model: modelInfoMock, } : { connection: connectionMock, recipe: recipeMock, }; }); test('error in build propagate it', async () => { vi.mocked(builderManagerMock.build).mockRejectedValue(new Error('build error')); const manager = await getInitializedRecipeManager(); await expect(() => { return manager.buildRecipe(applicationOptions); }).rejects.toThrowError('build error'); }); test('labels should be propagated', async () => { const manager = await getInitializedRecipeManager(); await manager.buildRecipe(applicationOptions, { 'test-label': 'test-value', }); expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Loading configuration', 'loading', { 'test-label': 'test-value', 'recipe-id': recipeMock.id, }); expect(builderManagerMock.build).toHaveBeenCalledWith( connectionMock, recipeMock, [ { arch: ['dummy-arch'], modelService: false, name: 'test-container', gpu_env: [], contextdir: '.', }, ], expect.any(String), { 'test-label': 'test-value', 'recipe-id': recipeMock.id, }, ); }); }); ================================================ FILE: packages/backend/src/managers/recipes/RecipeManager.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { GitCloneInfo, GitManager } from '../gitManager'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { Recipe, RecipeComponents } from '@shared/models/IRecipe'; import path from 'node:path'; import type { Task } from '@shared/models/ITask'; import type { LocalRepositoryRegistry } from '../../registries/LocalRepositoryRegistry'; import type { AIConfig, AIConfigFile, ContainerConfig } from '../../models/AIConfig'; import { parseYamlFile } from '../../models/AIConfig'; import { existsSync, statSync } from 'node:fs'; import { goarch } from '../../utils/arch'; import type { BuilderManager } from './BuilderManager'; import type { Disposable } from '@podman-desktop/api'; import { CONFIG_FILENAME } from '../../utils/RecipeConstants'; import type { InferenceManager } from '../inference/inferenceManager'; import { withDefaultConfiguration } from '../../utils/inferenceUtils'; import type { InferenceServer } from '@shared/models/IInference'; import { type ApplicationOptions, isApplicationOptionsWithModelInference } from '../../models/ApplicationOptions'; export interface AIContainers { aiConfigFile: AIConfigFile; containers: ContainerConfig[]; } export class RecipeManager implements Disposable { constructor( private appUserDirectory: string, private git: GitManager, private taskRegistry: TaskRegistry, private builderManager: BuilderManager, private localRepositories: LocalRepositoryRegistry, private inferenceManager: InferenceManager, ) {} dispose(): void {} init(): void {} private async doCheckout(gitCloneInfo: GitCloneInfo, labels?: { [id: string]: string }): Promise { // Creating checkout task const checkoutTask: Task = this.taskRegistry.createTask('Checking out repository', 'loading', { ...labels, git: 'checkout', }); try { await this.git.processCheckout(gitCloneInfo); checkoutTask.state = 'success'; } catch (err: unknown) { checkoutTask.state = 'error'; checkoutTask.error = String(err); // propagate error throw err; } finally { // Update task registry this.taskRegistry.updateTask(checkoutTask); } } public async cloneRecipe(recipe: Recipe, labels?: { [key: string]: string }): Promise { const localFolder = path.join(this.appUserDirectory, recipe.id); // clone the recipe repository on the local folder const gitCloneInfo: GitCloneInfo = { repository: recipe.repository, ref: recipe.ref, targetDirectory: localFolder, }; await this.doCheckout(gitCloneInfo, { ...labels, 'recipe-id': recipe.id, }); this.localRepositories.register({ path: gitCloneInfo.targetDirectory, sourcePath: path.join(gitCloneInfo.targetDirectory, recipe.basedir ?? ''), labels: { 'recipe-id': recipe.id, }, }); } public async buildRecipe(options: ApplicationOptions, labels?: { [key: string]: string }): Promise { const localFolder = path.join(this.appUserDirectory, options.recipe.id); let inferenceServer: InferenceServer | undefined; if (isApplicationOptionsWithModelInference(options)) { // if the recipe has a defined backend, we gives priority to using an inference server if (options.recipe.backend && options.recipe.backend === options.model.backend) { let task: Task | undefined; try { inferenceServer = this.inferenceManager.findServerByModel(options.model); task = this.taskRegistry.createTask('Starting Inference server', 'loading', labels); if (!inferenceServer) { const inferenceContainerId = await this.inferenceManager.createInferenceServer( await withDefaultConfiguration({ modelsInfo: [options.model], }), ); inferenceServer = this.inferenceManager.get(inferenceContainerId); this.taskRegistry.updateTask({ ...task, labels: { ...task.labels, containerId: inferenceContainerId, }, }); } else if (inferenceServer.status === 'stopped') { await this.inferenceManager.startInferenceServer(inferenceServer.container.containerId); } task.state = 'success'; } catch (e) { // we only skip the task update if the error is that we do not support this backend. // If so, we build the image for the model service if (task && String(e) !== 'no enabled provider could be found.') { task.state = 'error'; task.error = `Something went wrong while starting the inference server: ${String(e)}`; throw e; } } finally { if (task) { this.taskRegistry.updateTask(task); } } } } // load and parse the recipe configuration file and filter containers based on architecture const configAndFilteredContainers = this.getConfigAndFilterContainers( options.recipe.basedir, localFolder, !!inferenceServer, { ...labels, 'recipe-id': options.recipe.id, }, ); const images = await this.builderManager.build( options.connection, options.recipe, configAndFilteredContainers.containers, configAndFilteredContainers.aiConfigFile.path, { ...labels, 'recipe-id': options.recipe.id, }, ); return { images, inferenceServer, }; } private getConfigAndFilterContainers( recipeBaseDir: string | undefined, localFolder: string, useInferenceServer: boolean, labels?: { [key: string]: string }, ): AIContainers { // Adding loading configuration task const task = this.taskRegistry.createTask('Loading configuration', 'loading', labels); let aiConfigFile: AIConfigFile; try { // load and parse the recipe configuration file aiConfigFile = this.getConfiguration(recipeBaseDir, localFolder); } catch (e) { task.error = `Something went wrong while loading configuration: ${String(e)}.`; this.taskRegistry.updateTask(task); throw e; } // filter the containers based on architecture, gpu accelerator and backend (that define which model supports) let filteredContainers: ContainerConfig[] = this.filterContainers(aiConfigFile.aiConfig); // if we are using the inference server we can remove the model service if (useInferenceServer) { filteredContainers = filteredContainers.filter(c => !c.modelService); } if (filteredContainers.length > 0) { // Mark as success. task.state = 'success'; this.taskRegistry.updateTask(task); } else { // Mark as failure. task.error = 'No containers available.'; this.taskRegistry.updateTask(task); throw new Error('No containers available.'); } return { aiConfigFile: aiConfigFile, containers: filteredContainers, }; } private filterContainers(aiConfig: AIConfig): ContainerConfig[] { return aiConfig.application.containers.filter( container => container.gpu_env.length === 0 && container.arch.some(arc => arc === goarch()), ); } private getConfiguration(recipeBaseDir: string | undefined, localFolder: string): AIConfigFile { let configFile: string; if (recipeBaseDir !== undefined) { configFile = path.join(localFolder, recipeBaseDir, CONFIG_FILENAME); } else { configFile = path.join(localFolder, CONFIG_FILENAME); } if (!existsSync(configFile)) { throw new Error(`The file located at ${configFile} does not exist.`); } // If the user configured the config as a directory we check for "ai-lab.yaml" inside. if (statSync(configFile).isDirectory()) { const tmpPath = path.join(configFile, CONFIG_FILENAME); // If it has the ai-lab.yaml we use it. if (existsSync(tmpPath)) { configFile = tmpPath; } } // Parsing the configuration console.log(`Reading configuration from ${configFile}.`); let aiConfig: AIConfig; try { aiConfig = parseYamlFile(configFile, goarch()); } catch (err) { console.error('Cannot load configure file.', err); throw new Error(`Cannot load configuration file.`); } // Mark as success. return { aiConfig, path: configFile, }; } } ================================================ FILE: packages/backend/src/managers/snippets/java-okhttp-snippet.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test } from 'vitest'; import { javaOkHttpGenerator } from './java-okhttp-snippet'; test('expect return generated snippet', async () => { const payload = await javaOkHttpGenerator({ url: 'http://localhost:32412/v1/chat/completions' }); expect(payload).toBeDefined(); expect(payload).toContain('.url("http://localhost:32412/v1/chat/completions")'); }); ================================================ FILE: packages/backend/src/managers/snippets/java-okhttp-snippet.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RequestOptions } from '@shared/models/RequestOptions'; import mustache from 'mustache'; import javaOkHttpTemplate from '../../templates/java-okhttp.mustache?raw'; export async function javaOkHttpGenerator(requestOptions: RequestOptions): Promise { if (!requestOptions.url.endsWith('/v1/chat/completions')) throw new Error('Incompatible generator'); return mustache.render(javaOkHttpTemplate, { endpoint: requestOptions.url, }); } ================================================ FILE: packages/backend/src/managers/snippets/python-langchain-snippet.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test } from 'vitest'; import { pythonLangChainGenerator } from './python-langchain-snippet'; test('expect return generated snippet', async () => { const payload = await pythonLangChainGenerator({ url: 'http://localhost:32412/v1/chat/completions' }); expect(payload).toBeDefined(); expect(payload).toContain('model_service = "http://localhost:32412/v1/"'); }); ================================================ FILE: packages/backend/src/managers/snippets/python-langchain-snippet.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RequestOptions } from '@shared/models/RequestOptions'; import mustache from 'mustache'; import pythonLangChainTemplate from '../../templates/python-langchain.mustache?raw'; export async function pythonLangChainGenerator(requestOptions: RequestOptions): Promise { if (!requestOptions.url.endsWith('/v1/chat/completions')) throw new Error('Incompatible generator'); return mustache.render(pythonLangChainTemplate, { endpoint: requestOptions.url.replace('chat/completions', ''), }); } ================================================ FILE: packages/backend/src/managers/snippets/quarkus-snippet.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import { quarkusLangchain4Jgenerator } from './quarkus-snippet'; beforeEach(() => { vi.resetAllMocks(); }); test('expect fetched version in generated payload', async () => { const oldFetch = global.fetch; try { global.fetch = vi.fn().mockResolvedValue({ text: () => Promise.resolve( 'io.quarkiverse.langchain4jquarkus-langchain4j-corelatest-versionrelease-version', ), }); const payload = await quarkusLangchain4Jgenerator({ url: 'http://localhost:32412/v1/chat/completions' }); expect(payload).toBeDefined(); expect(payload).toContain('release-version'); } finally { global.fetch = oldFetch; } }); ================================================ FILE: packages/backend/src/managers/snippets/quarkus-snippet.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RequestOptions } from '@shared/models/RequestOptions'; import mustache from 'mustache'; import template from '../../templates/quarkus-langchain4j.mustache?raw'; import xmljs from 'xml-js'; const SUFFIX_LENGTH = '/chat/completions'.length; const METADATA_URL = 'https://repo1.maven.org/maven2/io/quarkiverse/langchain4j/quarkus-langchain4j-core/maven-metadata.xml'; let quarkusLangchain4jVersion: string; async function getQuarkusLangchain4jVersion(): Promise { if (quarkusLangchain4jVersion) { return quarkusLangchain4jVersion; } const response = await fetch(METADATA_URL, { redirect: 'follow' }); const content = JSON.parse(xmljs.xml2json(await response.text(), { compact: true })); // eslint-disable-next-line sonarjs/no-nested-assignment return (quarkusLangchain4jVersion = content.metadata.versioning.release._text); } export async function quarkusLangchain4Jgenerator(requestOptions: RequestOptions): Promise { if (!requestOptions.url.endsWith('/v1/chat/completions')) throw new Error('Incompatible generator'); return mustache.render(template, { baseUrl: requestOptions.url.substring(0, requestOptions.url.length - SUFFIX_LENGTH), version: await getQuarkusLangchain4jVersion(), }); } ================================================ FILE: packages/backend/src/models/AIConfig.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test, describe, vi } from 'vitest'; import fs from 'node:fs'; import { type AIConfig, AIConfigFormat, parseYamlFile } from './AIConfig'; // Define mock file paths and contents const mockYamlPath = '/path/to/mock.yml'; const defaultArch = 'x64'; const readFileSync = vi.spyOn(fs, 'readFileSync'); describe('parseYaml', () => { test('malformed configuration', () => { readFileSync.mockReturnValue(``); expect(() => { parseYamlFile(mockYamlPath, defaultArch); }).toThrowError('malformed configuration file.'); }); test('missing application property', () => { readFileSync.mockReturnValue(` wrong: `); expect(() => { parseYamlFile(mockYamlPath, defaultArch); }).toThrowError('malformed configuration file: missing version'); }); test('version mismatch', () => { readFileSync.mockReturnValue(` version: unknown application: true `); expect(() => { parseYamlFile(mockYamlPath, defaultArch); }).toThrowError('malformed configuration file: version not supported, got unknown expected v1.0.'); }); test('application primitive', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: true `); expect(() => { parseYamlFile(mockYamlPath, defaultArch); }).toThrowError('AIConfig has bad formatting: application does not have valid container property'); }); test('containers not an array', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: name: container1 contextdir: /path/to/dir1 arch: ["x86"] model-service: true gpu-env: ["env1", "env2"] ports: [ 8080 ] `); expect(() => { parseYamlFile(mockYamlPath, defaultArch); }).toThrowError('AIConfig has bad formatting: containers property must be an array.'); }); test('containers object', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: true `); expect(() => { parseYamlFile(mockYamlPath, defaultArch); }).toThrowError('AIConfig has bad formatting: containers property must be an array.'); }); test('should use architecture as string', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: - name: container1 contextdir: /path/to/dir1 arch: x86 ports: [ 8080 ] `); const expectedConfig: AIConfig = { version: AIConfigFormat.CURRENT, application: { containers: [ { name: 'container1', contextdir: '/path/to/dir1', arch: ['x86'], gpu_env: [], modelService: false, ports: [8080], }, ], }, }; expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig); }); test('should use all architectures', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: - name: container1 contextdir: /path/to/dir1 arch: ['arch1', 'arch2'] ports: [ 8080 ] `); const expectedConfig: AIConfig = { version: AIConfigFormat.CURRENT, application: { containers: [ { name: 'container1', contextdir: '/path/to/dir1', arch: ['arch1', 'arch2'], gpu_env: [], modelService: false, ports: [8080], }, ], }, }; expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig); }); test('should put the default architecture', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: - name: container1 contextdir: /path/to/dir1 ports: [ 8080 ] `); const expectedConfig: AIConfig = { version: AIConfigFormat.CURRENT, application: { containers: [ { name: 'container1', contextdir: '/path/to/dir1', arch: [defaultArch], gpu_env: [], modelService: false, ports: [8080], }, ], }, }; expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig); }); test('should use the image provided in the config', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: - name: container1 contextdir: /path/to/dir1 ports: [ 8080 ] image: dummy-image `); const expectedConfig: AIConfig = { version: AIConfigFormat.CURRENT, application: { containers: [ { name: 'container1', contextdir: '/path/to/dir1', arch: [defaultArch], gpu_env: [], modelService: false, ports: [8080], image: 'dummy-image', }, ], }, }; expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig); }); test('ports should always be a final number', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: - name: container1 contextdir: /path/to/dir1 ports: [ '8080', 8888 ] image: dummy-image `); const expectedConfig: AIConfig = { version: AIConfigFormat.CURRENT, application: { containers: [ { name: 'container1', contextdir: '/path/to/dir1', arch: [defaultArch], gpu_env: [], modelService: false, ports: [8080, 8888], image: 'dummy-image', }, ], }, }; expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig); }); test('should use gpu env', () => { readFileSync.mockReturnValue(` version: ${AIConfigFormat.CURRENT} application: containers: - name: container1 contextdir: /path/to/dir1 arch: ["x86"] model-service: true gpu-env: ["env1", "env2"] ports: [ 8080 ] - name: container2 arch: ["arm"] ports: [ 8001 ] `); const expectedConfig: AIConfig = { version: AIConfigFormat.CURRENT, application: { containers: [ { name: 'container1', contextdir: '/path/to/dir1', arch: ['x86'], modelService: true, gpu_env: ['env1', 'env2'], ports: [8080], }, { name: 'container2', contextdir: '.', arch: ['arm'], modelService: false, gpu_env: [], ports: [8001], }, ], }, }; expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig); }); }); ================================================ FILE: packages/backend/src/models/AIConfig.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import * as jsYaml from 'js-yaml'; import fs from 'node:fs'; export interface ContainerConfig { name: string; contextdir: string; containerfile?: string; arch: string[]; modelService: boolean; gpu_env: string[]; ports?: number[]; image?: string; backend?: string[]; } export enum AIConfigFormat { CURRENT = 'v1.0', } export interface AIConfig { version: AIConfigFormat; application: { containers: ContainerConfig[]; }; } export interface AIConfigFile { aiConfig: AIConfig; path: string; } export function isString(value: unknown): value is string { return (!!value && typeof value === 'string') || value instanceof String; } export function assertString(value: unknown): string { if (isString(value)) return value; throw new Error('value not a string'); } export function parseYamlFile(filepath: string, defaultArch: string): AIConfig { const raw: string = fs.readFileSync(filepath, 'utf-8'); const aiLabConfig: unknown = jsYaml.load(raw); if (!aiLabConfig || typeof aiLabConfig !== 'object') { throw new Error('malformed configuration file.'); } if (!('version' in aiLabConfig) || typeof aiLabConfig.version !== 'string') throw new Error('malformed configuration file: missing version'); if (aiLabConfig.version !== AIConfigFormat.CURRENT) throw new Error( `malformed configuration file: version not supported, got ${aiLabConfig.version} expected ${AIConfigFormat.CURRENT}.`, ); if (!('application' in aiLabConfig)) { throw new Error('malformed configuration file: missing application property'); } const application: unknown = aiLabConfig['application']; if (!application || typeof application !== 'object' || !('containers' in application)) { throw new Error('AIConfig has bad formatting: application does not have valid container property'); } if (!Array.isArray(application['containers'])) { throw new Error('AIConfig has bad formatting: containers property must be an array.'); } const containers: unknown[] = application['containers']; return { version: AIConfigFormat.CURRENT, application: { containers: containers.map(container => { if (!container || typeof container !== 'object') throw new Error('containers array malformed'); let contextdir: string; if ('contextdir' in container) { contextdir = assertString(container['contextdir']); } else { contextdir = '.'; } const architectures: string[] = []; if (!('arch' in container)) { architectures.push(defaultArch); } else if (Array.isArray(container['arch']) && container['arch'].every(arch => typeof arch === 'string')) { architectures.push(...container['arch']); } else if (typeof container['arch'] === 'string') { architectures.push(container['arch']); } else { throw new Error('malformed arch property'); } let containerfile: string | undefined = undefined; if ('containerfile' in container && isString(container['containerfile'])) { containerfile = container['containerfile']; } if (!('name' in container) || typeof container['name'] !== 'string') { throw new Error('invalid name property: must be string'); } return { arch: architectures, modelService: 'model-service' in container && container['model-service'] === true, containerfile, contextdir: contextdir, name: container['name'], gpu_env: 'gpu-env' in container && Array.isArray(container['gpu-env']) ? container['gpu-env'] : [], ports: 'ports' in container && Array.isArray(container['ports']) ? container['ports'].map(port => parseInt(port)) : [], image: 'image' in container && isString(container['image']) ? container['image'] : undefined, backend: 'backend' in container && Array.isArray(container['backend']) ? container['backend'] : undefined, }; }), }, }; } ================================================ FILE: packages/backend/src/models/ApplicationOptions.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ContainerProviderConnection } from '@podman-desktop/api'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { Recipe, RecipeDependencies } from '@shared/models/IRecipe'; export type ApplicationOptions = ApplicationOptionsDefault | ApplicationOptionsWithModelInference; export interface ApplicationOptionsDefault { connection: ContainerProviderConnection; recipe: Recipe; dependencies?: RecipeDependencies; } export type ApplicationOptionsWithModelInference = ApplicationOptionsDefault & { model: ModelInfo; }; export function isApplicationOptionsWithModelInference( options: ApplicationOptions, ): options is ApplicationOptionsWithModelInference { return 'model' in options; } ================================================ FILE: packages/backend/src/models/HuggingFaceModelHandler.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { EventEmitter } from '@podman-desktop/api'; import type { TelemetryLogger } from '@podman-desktop/api'; import { beforeEach, expect, test, vi } from 'vitest'; import { ModelsManager } from '../managers/modelsManager'; import type { CatalogManager } from '../managers/catalogManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { TaskRegistry } from '../registries/TaskRegistry'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; import type { PodmanConnection } from '../managers/podmanConnection'; import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry'; import { ModelHandlerRegistry } from '../registries/ModelHandlerRegistry'; import { HuggingFaceModelHandler } from './HuggingFaceModelHandler'; import { snapshotDownload } from '@huggingface/hub'; import type { RpcExtension } from '@shared/messages/MessageProxy'; vi.mock('@podman-desktop/api', () => { return { EventEmitter: vi.fn(), }; }); vi.mock('@huggingface/hub', () => { return { scanCacheDir: vi.fn(), snapshotDownload: vi.fn(), }; }); const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; const catalogManagerMock = { getModels(): ModelInfo[] { return [ { id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo, { id: 'model-id-2', name: 'model-id-2-model' } as ModelInfo, ]; }, onUpdate: vi.fn(), } as unknown as CatalogManager; const telemetryLogger = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; const taskRegistry: TaskRegistry = new TaskRegistry(rpcExtensionMock); const cancellationTokenRegistryMock = { createCancellationTokenSource: vi.fn(), } as unknown as CancellationTokenRegistry; const podmanConnectionMock = { getContainerProviderConnections: vi.fn(), } as unknown as PodmanConnection; const configurationRegistryMock = { getExtensionConfiguration: vi.fn(), } as unknown as ConfigurationRegistry; const modelHandlerRegistry = new ModelHandlerRegistry(rpcExtensionMock); const modelsManager: ModelsManager = new ModelsManager( rpcExtensionMock, catalogManagerMock, telemetryLogger, taskRegistry, cancellationTokenRegistryMock, podmanConnectionMock, configurationRegistryMock, modelHandlerRegistry, ); const huggingFaceModelHandler = new HuggingFaceModelHandler(modelsManager); beforeEach(() => { const listeners: ((value: unknown) => void)[] = []; const eventReturned = { event: vi.fn(), fire: vi.fn(), }; vi.mocked(EventEmitter).mockReturnValue(eventReturned as unknown as EventEmitter); vi.mocked(eventReturned.event).mockImplementation(callback => { listeners.push(callback); }); vi.mocked(eventReturned.fire).mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }); }); test('check http url are not supported', () => { expect(huggingFaceModelHandler.accept('http://example.com')).toBe(false); }); test('check https url are not supported', () => { expect(huggingFaceModelHandler.accept('http://example.com')).toBe(false); }); test('check huggingface url are supported', () => { expect(huggingFaceModelHandler.accept('huggingface://ibm-granite/my-model')).toBe(true); }); test('download reports error', async () => { vi.mocked(snapshotDownload).mockRejectedValue(new Error('error')); const listenerMock = vi.fn(); const downloader = huggingFaceModelHandler.createDownloader( { id: 'model-id-1', name: 'model-id-1-model', url: 'huggingface://ibm-granite/my-model' } as ModelInfo, { aborted: false } as AbortSignal, ); downloader.onEvent(listenerMock); let err: unknown; try { await downloader.perform('model-id-1'); } catch (error) { err = error; } expect(err).toBeDefined(); expect(listenerMock).toHaveBeenCalledWith({ id: 'model-id-1', message: 'Something went wrong: Error: error.', status: 'error', }); }); test('download returns cache in path', async () => { vi.mocked(snapshotDownload).mockResolvedValue('cache-path'); const listenerMock = vi.fn(); const downloader = huggingFaceModelHandler.createDownloader( { id: 'model-id-1', name: 'model-id-1-model', url: 'huggingface://ibm-granite/my-model' } as ModelInfo, { aborted: false } as AbortSignal, ); downloader.onEvent(listenerMock); await downloader.perform('model-id-1'); expect(downloader.getTarget()).toBe('cache-path'); expect(listenerMock).toHaveBeenCalledWith({ duration: expect.anything(), id: 'model-id-1', message: expect.anything(), status: 'completed', }); }); ================================================ FILE: packages/backend/src/models/HuggingFaceModelHandler.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { ModelHandler } from './ModelHandler'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { Downloader } from '../utils/downloader'; import { scanCacheDir, snapshotDownload } from '@huggingface/hub'; import type { CompletionEvent } from './baseEvent'; import { getDurationSecondsSince } from '../utils/utils'; import type { ModelsManager } from '../managers/modelsManager'; import fs from 'node:fs/promises'; function parseURL(url: string): { repo: string; revision?: string } | undefined { const u = URL.parse(url); if (u) { return { repo: u.pathname.slice(1), revision: u.searchParams.get('revision') ?? 'main' }; } return undefined; } class HuggingFaceDownloader extends Downloader { #target: string = ''; constructor( url: string, private repo: string, private revision: string | undefined, private abortSignal: AbortSignal, ) { super(url, ''); } override getTarget(): string { return this.#target; } async perform(id: string): Promise { const startTime = performance.now(); try { this.#target = await snapshotDownload({ repo: this.repo, revision: this.revision, }); const durationSeconds = getDurationSecondsSince(startTime); this._onEvent.fire({ id: id, status: 'completed', message: `Duration ${durationSeconds}s.`, duration: durationSeconds, } as CompletionEvent); } catch (err: unknown) { if (!this.abortSignal?.aborted) { this._onEvent.fire({ id: id, status: 'error', message: `Something went wrong: ${String(err)}.`, }); } else { this._onEvent.fire({ id: id, status: 'canceled', message: `Request cancelled: ${String(err)}.`, }); } throw err; } finally { this.completed = true; } } } export class HuggingFaceModelHandler extends ModelHandler { constructor(modelsManager: ModelsManager) { super('huggingface model registry', modelsManager); } accept(url: string): boolean { return url.startsWith('huggingface') || url.startsWith('hf'); } createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader { const result = parseURL(model.url!); if (result) { return new HuggingFaceDownloader(model.url!, result.repo, result.revision, abortSignal); } throw new Error(`Invalid URL: ${model.url} for model ${model.name}`); } async deleteModel(model: ModelInfo): Promise { if (model.file) { await fs.rm(model.file?.path, { recursive: true }); } else { throw new Error(`Model ${model.name} not downloaded yet.`); } } dispose(): void {} async getLocalModelsFromDisk(): Promise { const hfModels = this.modelsManager .getModelsInfo() .filter(model => model.url && this.accept(model.url)) .map(model => { return { model: model, repo: parseURL(model.url!) }; }) .filter(info => info.repo); scanCacheDir() .then(hfinfo => { for (const repo of hfinfo.repos) { for (const revision of repo.revisions) { for (const ref of revision.refs) { const model = hfModels.find(m => m.repo?.repo === repo.id.name && m.repo?.revision === ref); if (model) { model.model.file = { path: revision.path, file: '', creation: revision.lastModifiedAt, size: revision.size, }; } } } } }) .catch((err: unknown): void => { console.error('Something went wrong while scanning cache.', err); }); } } ================================================ FILE: packages/backend/src/models/ModelHandler.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { Disposable } from '@podman-desktop/api'; import { EventEmitter } from '@podman-desktop/api'; import type { Downloader } from '../utils/downloader'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { ModelsManager } from '../managers/modelsManager'; export abstract class ModelHandler implements Disposable { readonly name: string; readonly modelsManager: ModelsManager; protected _onUpdate = new EventEmitter(); readonly onUpdate = this._onUpdate.event; protected constructor(name: string, modelsManager: ModelsManager) { this.name = name; this.modelsManager = modelsManager; } /** * Releases any resources held by the model handler. */ abstract dispose(): void; /** * Returns true if the model handler can handle the given URL. * @param url */ abstract accept(url: string): boolean; /** * Creates a downloader for the given model. * @param model the model to download * @param abortSignal the signal to abort the download */ abstract createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader; /** * Retrieves the local models from disk. */ abstract getLocalModelsFromDisk(): Promise; /** * Deletes the given model from local storage. * @param model the model */ abstract deleteModel(model: ModelInfo): Promise; } ================================================ FILE: packages/backend/src/models/TaskRunner.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ export interface RunAsTaskOptions { loadingLabel: string; // label set when the task terminates normally, by default the loading label is kept successLabel?: string; // label set when the task terminates in error, by default the loading label is kept errorLabel?: string; // the error message to display when task terminates in error errorMsg: (err: unknown) => string; // if true, all subtasks (tasks found with the same labels) will be immediately marked in error if this task fails failFastSubtasks?: boolean; } export interface TaskRunnerTools { updateLabels: (f: (labels: Record) => Record) => void; } ================================================ FILE: packages/backend/src/models/URLModelHandler.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import fs from 'node:fs'; import { basename, join, resolve } from 'node:path'; import type { FileSystemWatcher } from '@podman-desktop/api'; import { fs as apiFs } from '@podman-desktop/api'; import { ModelHandler } from './ModelHandler'; import type { ModelsManager } from '../managers/modelsManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { Downloader } from '../utils/downloader'; import { URLDownloader } from '../utils/urldownloader'; export class URLModelHandler extends ModelHandler { #watcher: FileSystemWatcher; constructor( modelsManager: ModelsManager, private modelsDir: string, ) { super('url model registry', modelsManager); this.#watcher = apiFs.createFileSystemWatcher(this.modelsDir); this.#watcher.onDidCreate(() => this._onUpdate.fire()); this.#watcher.onDidDelete(() => this._onUpdate.fire()); this.#watcher.onDidChange(() => this._onUpdate.fire()); } override dispose(): void { this.#watcher.dispose(); } override accept(url: string): boolean { return url.startsWith('https') || url.startsWith('http') || url.startsWith('file'); } override createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader { const destDir = join(this.modelsDir, model.id); const target = resolve(destDir, basename(model.url!)); return new URLDownloader(model.url!, target, model.sha256, abortSignal); } override async getLocalModelsFromDisk(): Promise { if (!fs.existsSync(this.modelsDir)) { return; } const entries = await fs.promises.readdir(this.modelsDir, { withFileTypes: true }); const dirs = entries.filter(dir => dir.isDirectory()); for (const d of dirs) { const modelEntries = await fs.promises.readdir(resolve(d.parentPath, d.name)); if (modelEntries.length !== 1) { // we support models with one file only for now continue; } const modelFile = modelEntries[0]; const fullPath = resolve(d.parentPath, d.name, modelFile); // Check for corresponding models or tmp file that should be ignored try { const model = this.modelsManager.getModelInfo(d.name); if (fullPath.endsWith('.tmp')) { continue; } let info: { size?: number; mtime?: Date } = { size: undefined, mtime: undefined }; try { info = await fs.promises.stat(fullPath); } catch (err: unknown) { console.error('Something went wrong while getting file stats (probably in use).', err); } model.file = { file: modelFile, path: resolve(d.parentPath, d.name), size: info.size, creation: info.mtime, }; } catch (e: unknown) { console.warn(`Can't find model info for local folder ${d.name}.`, e); } } } async deleteModel(model: ModelInfo): Promise { const folder = resolve(this.modelsDir, model.id); await fs.promises.rm(folder, { recursive: true, force: true, maxRetries: 3 }); } } ================================================ FILE: packages/backend/src/models/baseEvent.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ export interface BaseEvent { id: string; status: 'error' | 'completed' | 'progress' | 'canceled'; message?: string; } export interface CompletionEvent extends BaseEvent { status: 'completed' | 'error' | 'canceled'; duration: number; } export interface ProgressEvent extends BaseEvent { status: 'progress'; value: number; total: number; } export const isCompletionEvent = (value: unknown): value is CompletionEvent => { return ( !!value && typeof value === 'object' && 'status' in value && typeof value['status'] === 'string' && ['canceled', 'completed', 'error'].includes(value['status']) ); }; export const isProgressEvent = (value: unknown): value is ProgressEvent => { return ( !!value && typeof value === 'object' && 'status' in value && value['status'] === 'progress' && 'value' in value ); }; ================================================ FILE: packages/backend/src/registries/ApplicationRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RecipeModelIndex } from '@shared/models/IRecipeModelIndex'; export class ApplicationRegistry { #applications = new Map(); keys(): RecipeModelIndex[] { return Array.from(this.#applications.values()).map(a => ({ recipeId: a.recipeId, modelId: a.modelId })); } has(recipeModel: RecipeModelIndex): boolean { return this.#applications.has(this.hash(recipeModel)); } delete(recipeModel: RecipeModelIndex): boolean { return this.#applications.delete(this.hash(recipeModel)); } values(): IterableIterator { return this.#applications.values(); } get(recipeModel: RecipeModelIndex): T { const application = this.#applications.get(this.hash(recipeModel)); if (!application) throw new Error('application not found.'); return application; } set(recipeModel: RecipeModelIndex, value: T): void { this.#applications.set(this.hash(recipeModel), value); } clear(): void { this.#applications.clear(); } private hash(recipeModel: RecipeModelIndex): string { return recipeModel.recipeId + recipeModel.modelId; } } ================================================ FILE: packages/backend/src/registries/CancellationTokenRegistry.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import { CancellationTokenRegistry } from './CancellationTokenRegistry'; import { CancellationTokenSource, EventEmitter } from '@podman-desktop/api'; vi.mock('@podman-desktop/api', async () => { return { EventEmitter: vi.fn(), CancellationTokenSource: vi.fn(), }; }); beforeEach(() => { vi.resetAllMocks(); // mock event emitters const listeners: ((value: unknown) => void)[] = []; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), dispose: vi.fn(), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter); vi.mocked(CancellationTokenSource).mockReturnValue({ cancel: vi.fn(), dispose: vi.fn(), token: { isCancellationRequested: false, onCancellationRequested: vi.fn(), }, }); }); test('created token should be retrievable', () => { const registry = new CancellationTokenRegistry(); const tokenId = registry.createCancellationTokenSource(); expect(tokenId).toBeDefined(); expect(registry.hasCancellationTokenSource(tokenId)).toBeTruthy(); }); test('created token should not be cancelled', () => { const registry = new CancellationTokenRegistry(); const source = registry.getCancellationTokenSource(registry.createCancellationTokenSource()); expect(source).toBeDefined(); expect(source?.token.isCancellationRequested).toBeFalsy(); }); test('cancel token should be removed from registry', () => { const registry = new CancellationTokenRegistry(); const tokenId = registry.createCancellationTokenSource(); expect(registry.hasCancellationTokenSource(tokenId)).toBeTruthy(); registry.cancel(tokenId); expect(registry.hasCancellationTokenSource(tokenId)).toBeFalsy(); }); test('disposing registry should dispose with cancel all tokens', () => { const registry = new CancellationTokenRegistry(); const source = registry.getCancellationTokenSource(registry.createCancellationTokenSource()); registry.dispose(); expect(source?.cancel).toHaveBeenCalled(); expect(source?.dispose).toHaveBeenCalled(); }); test('creating cancellation token with function should register it', () => { const registry = new CancellationTokenRegistry(); const func = vi.fn(); const source = registry.getCancellationTokenSource(registry.createCancellationTokenSource(func)); expect(source?.token.onCancellationRequested).toHaveBeenCalledWith(func); }); ================================================ FILE: packages/backend/src/registries/CancellationTokenRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { CancellationTokenSource, type Disposable } from '@podman-desktop/api'; export class CancellationTokenRegistry implements Disposable { #callbackId: number; #callbacksCancellableToken: Map; constructor() { this.#callbackId = 0; this.#callbacksCancellableToken = new Map(); } /** * Creating a cancellation token. * @param func an optional function that will be called when the cancel action will be triggered */ createCancellationTokenSource(func?: () => void): number { // keep track of this request this.#callbackId++; const token = new CancellationTokenSource(); if (func !== undefined) { token.token.onCancellationRequested(func); } // store the callback that will resolve the promise this.#callbacksCancellableToken.set(this.#callbackId, token); return this.#callbackId; } getCancellationTokenSource(id: number): CancellationTokenSource | undefined { if (this.hasCancellationTokenSource(id)) { return this.#callbacksCancellableToken.get(id); } return undefined; } hasCancellationTokenSource(id: number): boolean { return this.#callbacksCancellableToken.has(id); } cancel(tokenId: number): void { if (!this.hasCancellationTokenSource(tokenId)) throw new Error(`Cancellation token with id ${tokenId} does not exist.`); this.getCancellationTokenSource(tokenId)?.cancel(); this.delete(tokenId); } delete(tokenId: number): void { this.#callbacksCancellableToken.delete(tokenId); } dispose(): void { Array.from(this.#callbacksCancellableToken.values()).forEach(source => { source.cancel(); source.dispose(); }); this.#callbacksCancellableToken.clear(); } } ================================================ FILE: packages/backend/src/registries/ConfigurationRegistry.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { vi, expect, test } from 'vitest'; import { configuration, type Configuration } from '@podman-desktop/api'; import { ConfigurationRegistry } from './ConfigurationRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; const fakeConfiguration = { get: vi.fn(), has: vi.fn(), update: vi.fn(), } as unknown as Configuration; const rpcExtensionMock = { fire: vi.fn().mockResolvedValue(true), } as unknown as RpcExtension; vi.mock('@podman-desktop/api', async () => { return { configuration: { getConfiguration: (): unknown => fakeConfiguration, onDidChangeConfiguration: vi.fn(), }, }; }); test('init should init listener', () => { const registry = new ConfigurationRegistry(rpcExtensionMock, 'appdir'); vi.mocked(fakeConfiguration.has).mockReturnValue(true); registry.init(); expect(configuration.onDidChangeConfiguration).toHaveBeenCalled(); }); test('dispose should dispose listener', () => { const registry = new ConfigurationRegistry(rpcExtensionMock, 'appdir'); vi.mocked(fakeConfiguration.has).mockReturnValue(true); const disposeMock = vi.fn(); vi.mocked(configuration.onDidChangeConfiguration).mockReturnValue({ dispose: disposeMock }); registry.init(); expect(configuration.onDidChangeConfiguration).toHaveBeenCalled(); registry.dispose(); expect(disposeMock).toHaveBeenCalled(); }); test('update should trigger configuration update', async () => { const registry = new ConfigurationRegistry(rpcExtensionMock, 'appdir'); vi.mocked(fakeConfiguration.has).mockReturnValue(true); vi.mocked(fakeConfiguration.update).mockResolvedValue(undefined); registry.init(); await registry.updateExtensionConfiguration({ modelsPath: '' }); expect(fakeConfiguration.update).toHaveBeenCalledWith('models.path', ''); }); ================================================ FILE: packages/backend/src/registries/ConfigurationRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { configuration, version, type Configuration, type Disposable } from '@podman-desktop/api'; import { Publisher } from '../utils/Publisher'; import type { ExtensionConfiguration } from '@shared/models/IExtensionConfiguration'; import { MSG_CONFIGURATION_UPDATE } from '@shared/Messages'; import path from 'node:path'; import type { RpcExtension } from '@shared/messages/MessageProxy'; const CONFIGURATION_SECTIONS: string[] = [ 'models.path', 'experimentalGPU', 'apiPort', 'inferenceRuntime', 'experimentalTuning', 'modelUploadDisabled', 'showGPUPromotion', 'appearance', ]; const API_PORT_DEFAULT = 10434; export class ConfigurationRegistry extends Publisher implements Disposable { #configuration: Configuration; #configurationPodmanDesktop: Configuration; #configurationDisposable: Disposable | undefined; constructor( rpcExtension: RpcExtension, private appUserDirectory: string, ) { super(rpcExtension, MSG_CONFIGURATION_UPDATE, () => this.getExtensionConfiguration()); this.#configuration = configuration.getConfiguration('ai-lab'); this.#configurationPodmanDesktop = configuration.getConfiguration('preferences'); } getExtensionConfiguration(): ExtensionConfiguration { return { modelsPath: this.getModelsPath(), experimentalGPU: this.#configuration.get('experimentalGPU') ?? false, apiPort: this.#configuration.get('apiPort') ?? API_PORT_DEFAULT, inferenceRuntime: this.#configuration.get('inferenceRuntime') ?? 'all', experimentalTuning: this.#configuration.get('experimentalTuning') ?? false, modelUploadDisabled: this.#configuration.get('modelUploadDisabled') ?? false, showGPUPromotion: this.#configuration.get('showGPUPromotion') ?? true, appearance: this.#configurationPodmanDesktop.get('appearance') ?? 'dark', }; } getPodmanDesktopVersion(): string { return version; } private getFieldName(section: string): keyof Partial { return section.replace(/\.(\w)/, (match, char) => char.toUpperCase()) as keyof Partial; } async updateExtensionConfiguration(update: Partial): Promise { for (const section of CONFIGURATION_SECTIONS) { const fieldName = this.getFieldName(section); const value = update[fieldName]; if (value !== undefined) { await this.#configuration.update(section, value); } } this.notify(); //https://github.com/containers/podman-desktop/issues/9194 } private getModelsPath(): string { const value = this.#configuration.get('models.path'); if (value && value.length > 0) { return value; } return path.join(this.appUserDirectory, 'models'); } dispose(): void { this.#configurationDisposable?.dispose(); } init(): void { this.#configurationDisposable = configuration.onDidChangeConfiguration(event => { if (CONFIGURATION_SECTIONS.some(section => event.affectsConfiguration(`ai-lab.${section}`))) { this.notify(); } if (CONFIGURATION_SECTIONS.some(section => event.affectsConfiguration(`preferences.${section}`))) { this.notify(); } }); } } ================================================ FILE: packages/backend/src/registries/ContainerRegistry.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeAll, expect, test, vi } from 'vitest'; import { ContainerRegistry } from './ContainerRegistry'; import { type ContainerJSONEvent, EventEmitter } from '@podman-desktop/api'; import { TestEventEmitter } from '../tests/utils'; const mocks = vi.hoisted(() => ({ onEventMock: vi.fn(), DisposableCreateMock: vi.fn(), })); vi.mock('@podman-desktop/api', async () => { return { EventEmitter: vi.fn(), Disposable: { create: mocks.DisposableCreateMock, }, containerEngine: { onEvent: mocks.onEventMock, }, }; }); beforeAll(() => { vi.mocked(EventEmitter).mockImplementation(() => new TestEventEmitter() as unknown as EventEmitter); }); test('ContainerRegistry init', () => { const registry = new ContainerRegistry(); registry.init(); expect(mocks.onEventMock).toHaveBeenCalledOnce(); }); test('ContainerRegistry subscribe', () => { // Get the callback created by the ContainerRegistry let callback: ((event: ContainerJSONEvent) => void) | undefined; mocks.onEventMock.mockImplementation((method: (event: ContainerJSONEvent) => void) => { callback = method; }); // Create the ContainerRegistry and init const registry = new ContainerRegistry(); registry.init(); // Let's create a dummy subscriber let subscribedStatus: undefined | string = undefined; registry.subscribe('random', (status: string) => { subscribedStatus = status; }); if (!callback) throw new Error('undefined callback'); // Generate a fake event callback({ status: 'die', id: 'random', type: 'container', }); expect(subscribedStatus).toBe('die'); expect(mocks.DisposableCreateMock).toHaveBeenCalledOnce(); }); test('ContainerRegistry unsubscribe all if container remove', () => { // Get the callback created by the ContainerRegistry let callback: ((event: ContainerJSONEvent) => void) | undefined; mocks.onEventMock.mockImplementation((method: (event: ContainerJSONEvent) => void) => { callback = method; }); // Create the ContainerRegistry and init const registry = new ContainerRegistry(); registry.init(); // Let's create a dummy subscriber const subscribeMock = vi.fn(); registry.subscribe('random', subscribeMock); if (!callback) throw new Error('undefined callback'); // Generate a remove event callback({ status: 'remove', id: 'random', type: 'container' }); // Call it a second time callback({ status: 'remove', id: 'random', type: 'container' }); // Our subscriber should only have been called once, the first, after it should have been removed. expect(subscribeMock).toHaveBeenCalledOnce(); }); test('ContainerRegistry subscriber disposed should not be called', () => { // Get the callback created by the ContainerRegistry let callback: ((event: ContainerJSONEvent) => void) | undefined; mocks.onEventMock.mockImplementation((method: (event: ContainerJSONEvent) => void) => { callback = method; }); mocks.DisposableCreateMock.mockImplementation(callback => ({ dispose: (): void => callback(), })); // Create the ContainerRegistry and init const registry = new ContainerRegistry(); registry.init(); // Let's create a dummy subscriber const subscribeMock = vi.fn(); const disposable = registry.subscribe('random', subscribeMock); disposable.dispose(); if (!callback) throw new Error('undefined callback'); // Generate a random event callback({ status: 'die', id: 'random', type: 'container' }); // never should have been called expect(subscribeMock).toHaveBeenCalledTimes(0); }); test('ContainerRegistry should fire ContainerStart when container start', () => { // Get the callback created by the ContainerRegistry let callback: ((event: ContainerJSONEvent) => void) | undefined; mocks.onEventMock.mockImplementation((method: (event: ContainerJSONEvent) => void) => { callback = method; }); // Create the ContainerRegistry and init const registry = new ContainerRegistry(); registry.init(); const startListenerMock = vi.fn(); registry.onStartContainerEvent(startListenerMock); if (!callback) throw new Error('undefined callback'); // Generate a remove event callback({ status: 'remove', id: 'random', type: 'container' }); expect(startListenerMock).not.toHaveBeenCalled(); // Call it a second time callback({ status: 'start', id: 'random', type: 'container' }); // Our subscriber should only have been called once, the first, after it should have been removed. expect(startListenerMock).toHaveBeenCalledOnce(); }); test('ContainerRegistry should fire ContainerStop when container stop', () => { // Get the callback created by the ContainerRegistry let callback: ((event: ContainerJSONEvent) => void) | undefined; mocks.onEventMock.mockImplementation((method: (event: ContainerJSONEvent) => void) => { callback = method; }); // Create the ContainerRegistry and init const registry = new ContainerRegistry(); registry.init(); const stopListenerMock = vi.fn(); registry.onStopContainerEvent(stopListenerMock); if (!callback) throw new Error('undefined callback'); // Generate a remove event callback({ status: 'remove', id: 'random', type: 'container' }); expect(stopListenerMock).not.toHaveBeenCalled(); // Call it a second time callback({ status: 'start', id: 'random', type: 'container' }); // Our subscriber should only have been called once, the first, after it should have been removed. expect(stopListenerMock).not.toHaveBeenCalled(); callback({ status: 'die', id: 'random', type: 'container' }); // Our subscriber should only have been called once, the first, after it should have been removed. expect(stopListenerMock).toHaveBeenCalledOnce(); }); ================================================ FILE: packages/backend/src/registries/ContainerRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import * as podmanDesktopApi from '@podman-desktop/api'; export type Subscriber = { id: number; callback: (status: string) => void; }; export interface ContainerEvent { id: string; } export interface ContainerHealthy { id: string; } export class ContainerRegistry implements podmanDesktopApi.Disposable { private count: number = 0; private subscribers: Map = new Map(); private readonly _onStartContainerEvent = new podmanDesktopApi.EventEmitter(); readonly onStartContainerEvent: podmanDesktopApi.Event = this._onStartContainerEvent.event; private readonly _onStopContainerEvent = new podmanDesktopApi.EventEmitter(); readonly onStopContainerEvent: podmanDesktopApi.Event = this._onStopContainerEvent.event; private readonly _onHealthyContainerEvent = new podmanDesktopApi.EventEmitter(); readonly onHealthyContainerEvent: podmanDesktopApi.Event = this._onHealthyContainerEvent.event; #eventDisposable: podmanDesktopApi.Disposable | undefined; init(): void { this.#eventDisposable = podmanDesktopApi.containerEngine.onEvent(event => { if (event.status === 'start') { this._onStartContainerEvent.fire({ id: event.id, }); } else if (event.status === 'die') { this._onStopContainerEvent.fire({ id: event.id, }); } if (event.status === 'health_status' && 'HealthStatus' in event && event.HealthStatus === 'healthy') { this._onHealthyContainerEvent.fire({ id: event.id, }); } if (this.subscribers.has(event.id)) { this.subscribers.get(event.id)?.forEach(subscriber => subscriber.callback(event.status)); // If the event type is remove, we dispose all subscribers for the specific containers if (event.status === 'remove') { this.subscribers.delete(event.id); } } }); } dispose(): void { this.#eventDisposable?.dispose(); } subscribe(containerId: string, callback: (status: string) => void): podmanDesktopApi.Disposable { const subscriberId = ++this.count; const nSubs: Subscriber[] = [ ...(this.subscribers.get(containerId) ?? []), { id: subscriberId, callback: callback, }, ]; this.subscribers.set(containerId, nSubs); return podmanDesktopApi.Disposable.create(() => { if (!this.subscribers.has(containerId)) return; this.subscribers.set( containerId, nSubs.filter(subscriber => subscriber.id !== subscriberId), ); }); } } ================================================ FILE: packages/backend/src/registries/ConversationRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { Publisher } from '../utils/Publisher'; import type { AssistantChat, ChatMessage, Conversation, Message, ModelUsage, ToolCall, } from '@shared/models/IPlaygroundMessage'; import type { Disposable } from '@podman-desktop/api'; import { MSG_CONVERSATIONS_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export class ConversationRegistry extends Publisher implements Disposable { #conversations: Map; #counter: number; constructor(rpcExtension: RpcExtension) { super(rpcExtension, MSG_CONVERSATIONS_UPDATE, () => this.getAll()); this.#conversations = new Map(); this.#counter = 0; } getUniqueId(): string { return `${++this.#counter}`; } /** * Remove a message from a conversation * @param conversationId * @param messageId */ removeMessage(conversationId: string, messageId: string): void { const conversation: Conversation = this.get(conversationId); conversation.messages = conversation.messages.filter(message => message.id !== messageId); this.notify(); } /** * Utility method to update a message content in a given conversation * @param conversationId * @param messageId * @param message */ update(conversationId: string, messageId: string, message: Partial): void { const conversation: Conversation = this.get(conversationId); const messageIndex = conversation.messages.findIndex(message => message.id === messageId); if (messageIndex === -1) throw new Error(`message with id ${messageId} does not exist in conversation ${conversationId}.`); // Update the message with the provided content conversation.messages[messageIndex] = { ...conversation.messages[messageIndex], ...message, id: messageId, // preventing we are not updating the id }; this.notify(); } deleteConversation(id: string): void { this.#conversations.delete(id); this.notify(); } createConversation(name: string, modelId: string): string { const conversationId = this.getUniqueId(); this.#conversations.set(conversationId, { name: name, modelId: modelId, messages: [], id: conversationId, usage: { completion_tokens: 0, prompt_tokens: 0, } as ModelUsage, }); this.notify(); return conversationId; } /** * This method will be responsible for finalizing the message * @param conversationId * @param messageId */ completeMessage(conversationId: string, messageId: string): void { const conversation: Conversation = this.get(conversationId); const messageIndex = conversation.messages.findIndex(message => message.id === messageId); if (messageIndex === -1) throw new Error(`message with id ${messageId} does not exist in conversation ${conversationId}.`); this.update(conversationId, messageId, { ...conversation.messages[messageIndex], choices: undefined, role: 'assistant', completed: Date.now(), } as AssistantChat); } /** * Utility method to quickly add a usage to a conversation * @param conversationId * @param usage */ setUsage(conversationId: string, usage: ModelUsage): void { const conversation: Conversation = this.get(conversationId); this.#conversations.set(conversationId, { ...conversation, usage, }); this.notify(); } /** * Utility method to quickly add a delta to a given a message inside a conversation * @param conversationId * @param messageId * @param delta */ textDelta(conversationId: string, messageId: string, delta: string): void { const conversation: Conversation = this.get(conversationId); const messageIndex = conversation.messages.findIndex(message => message.id === messageId); if (messageIndex === -1) { throw new Error(`message with id ${messageId} does not exist in conversation ${conversationId}.`); } this.update(conversationId, messageId, { ...conversation.messages[messageIndex], content: ((conversation.messages[messageIndex] as AssistantChat).content ?? '') + delta, } as AssistantChat); } /** * Utility method to quickly add a tool-call assistant message to a conversation */ toolResult(conversationId: string, toolCallId: string, toolResult: string | object): void { const conversation: Conversation = this.get(conversationId); const messageIndex = conversation.messages.findIndex( message => (message as ChatMessage)?.role === 'assistant' && ((message as AssistantChat).content as ToolCall)?.type === 'tool-call' && ((message as AssistantChat).content as ToolCall)?.toolCallId === toolCallId, ); if (messageIndex === -1) { throw new Error(`message with for tool call ${toolCallId} does not exist in conversation ${conversationId}.`); } const content: ToolCall = { ...((conversation.messages[messageIndex] as AssistantChat).content as ToolCall), result: toolResult, }; this.update(conversationId, conversation.messages[messageIndex].id, { ...conversation.messages[messageIndex], completed: Date.now(), content, } as AssistantChat); } /** * Utility method to add a new Message to a given conversation * @param conversationId * @param message */ submit(conversationId: string, message: Message): void { const conversation = this.#conversations.get(conversationId); if (conversation === undefined) throw new Error(`conversation with id ${conversationId} does not exist.`); this.#conversations.set(conversationId, { ...conversation, messages: [...conversation.messages, message], }); this.notify(); } dispose(): void { this.#conversations.clear(); } get(conversationId: string): Conversation { const conversation: Conversation | undefined = this.#conversations.get(conversationId); if (conversation === undefined) throw new Error(`conversation with id ${conversationId} does not exist.`); return conversation; } getAll(): Conversation[] { return Array.from(this.#conversations.values()); } } ================================================ FILE: packages/backend/src/registries/InferenceProviderRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { Publisher } from '../utils/Publisher'; import type { InferenceProvider } from '../workers/provider/InferenceProvider'; import { Disposable } from '@podman-desktop/api'; import { MSG_INFERENCE_PROVIDER_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import type { InferenceType } from '@shared/models/IInference'; export class InferenceProviderRegistry extends Publisher { #providers: Map; constructor(rpcExtension: RpcExtension) { super(rpcExtension, MSG_INFERENCE_PROVIDER_UPDATE, () => this.getAll().map(provider => provider.name)); this.#providers = new Map(); } register(provider: InferenceProvider): Disposable { this.#providers.set(provider.name, provider); this.notify(); return Disposable.create(() => { this.unregister(provider.name); }); } unregister(name: string): void { this.#providers.delete(name); } getAll(): InferenceProvider[] { return Array.from(this.#providers.values()); } getByType(type: InferenceType): InferenceProvider[] { return Array.from(this.#providers.values()).filter(provider => provider.type === type); } get(name: string): InferenceProvider { const provider = this.#providers.get(name); if (provider === undefined) throw new Error(`no provider with name ${name} was found.`); return provider; } } ================================================ FILE: packages/backend/src/registries/LocalRepositoryRegistry.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import { LocalRepositoryRegistry } from './LocalRepositoryRegistry'; import type { Recipe } from '@shared/models/IRecipe'; import fs from 'node:fs'; import path from 'node:path'; import type { CatalogManager } from '../managers/catalogManager'; import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_LOCAL_REPOSITORY_UPDATE } from '@shared/Messages'; const mocks = vi.hoisted(() => ({ DisposableCreateMock: vi.fn(), })); vi.mock('@podman-desktop/api', async () => { return { Disposable: { create: mocks.DisposableCreateMock, }, }; }); vi.mock('node:fs', () => { return { existsSync: vi.fn(), promises: { rm: vi.fn(), }, }; }); const catalogManagerMock = { onUpdate: vi.fn(), getRecipes: vi.fn(), } as unknown as CatalogManager; const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; beforeEach(() => { vi.resetAllMocks(); vi.mock('node:fs'); vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true); }); test('should not have any repositories by default', () => { const localRepositories = new LocalRepositoryRegistry(rpcExtensionMock, '/appUserDirectory', catalogManagerMock); expect(localRepositories.getLocalRepositories().length).toBe(0); }); test('should notify webview when register', () => { const localRepositories = new LocalRepositoryRegistry(rpcExtensionMock, '/appUserDirectory', catalogManagerMock); localRepositories.register({ path: 'random', sourcePath: 'random', labels: { 'recipe-id': 'random' } }); expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(1, MSG_LOCAL_REPOSITORY_UPDATE, [ { path: 'random', sourcePath: 'random', labels: { 'recipe-id': 'random' } }, ]); }); test('should notify webview when unregister', async () => { const localRepositories = new LocalRepositoryRegistry(rpcExtensionMock, '/appUserDirectory', catalogManagerMock); vi.spyOn(fs.promises, 'rm').mockResolvedValue(); localRepositories.register({ path: 'random', sourcePath: 'random', labels: { 'recipe-id': 'random' } }); await localRepositories.deleteLocalRepository('random'); expect(rpcExtensionMock.fire).toHaveBeenLastCalledWith(MSG_LOCAL_REPOSITORY_UPDATE, []); }); test('should register localRepo if it find the folder of the recipe', () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); vi.mocked(catalogManagerMock.getRecipes).mockReturnValue([ { id: 'recipe', } as unknown as Recipe, ]); const localRepositories = new LocalRepositoryRegistry(rpcExtensionMock, '/appUserDirectory', catalogManagerMock); const registerMock = vi.spyOn(localRepositories, 'register'); localRepositories.init(); const folder = path.join('/appUserDirectory', 'recipe'); expect(registerMock).toHaveBeenCalledWith({ path: folder, sourcePath: folder, labels: { 'recipe-id': 'recipe' }, }); }); test('should register localRepo when catalog get updated', () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); vi.mocked(catalogManagerMock.getRecipes).mockReturnValue([]); let listener: ((catalog: ApplicationCatalog) => void) | undefined = undefined; vi.mocked(catalogManagerMock.onUpdate).mockImplementation((fn: (catalog: ApplicationCatalog) => void) => { listener = fn; return { dispose: vi.fn() }; }); const localRepositories = new LocalRepositoryRegistry(rpcExtensionMock, '/appUserDirectory', catalogManagerMock); const registerMock = vi.spyOn(localRepositories, 'register'); localRepositories.init(); const folder = path.join('/appUserDirectory', 'recipe'); expect(registerMock).not.toHaveBeenCalled(); expect(listener).toBeDefined(); if (!listener) throw new Error('undefined listener'); (listener as (catalog: ApplicationCatalog) => void)({ recipes: [ { id: 'recipe', } as unknown as Recipe, ], models: [], categories: [], }); expect(registerMock).toHaveBeenCalledWith({ path: folder, sourcePath: folder, labels: { 'recipe-id': 'recipe' }, }); }); test('should NOT register localRepo if it does not find the folder of the recipe', () => { vi.spyOn(fs, 'existsSync').mockReturnValue(false); vi.mocked(catalogManagerMock.getRecipes).mockReturnValue([ { id: 'recipe', } as unknown as Recipe, ]); const localRepositories = new LocalRepositoryRegistry(rpcExtensionMock, '/appUserDirectory', catalogManagerMock); const registerMock = vi.spyOn(localRepositories, 'register'); localRepositories.init(); expect(registerMock).not.toHaveBeenCalled(); }); ================================================ FILE: packages/backend/src/registries/LocalRepositoryRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { LocalRepository } from '@shared/models/ILocalRepository'; import { Disposable } from '@podman-desktop/api'; import { Publisher } from '../utils/Publisher'; import type { Recipe } from '@shared/models/IRecipe'; import fs from 'node:fs'; import path from 'node:path'; import type { CatalogManager } from '../managers/catalogManager'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_LOCAL_REPOSITORY_UPDATE } from '@shared/Messages'; /** * The LocalRepositoryRegistry is responsible for keeping track of the directories where recipe are cloned */ export class LocalRepositoryRegistry extends Publisher implements Disposable { // Map path => LocalRepository private repositories: Map = new Map(); #catalogEventDisposable: Disposable | undefined; constructor( rpcExtension: RpcExtension, private appUserDirectory: string, private catalogManager: CatalogManager, ) { super(rpcExtension, MSG_LOCAL_REPOSITORY_UPDATE, () => this.getLocalRepositories()); } dispose(): void { this.#catalogEventDisposable?.dispose(); } init(): void { this.#catalogEventDisposable = this.catalogManager.onUpdate(({ recipes }) => { this.loadLocalRecipeRepositories(recipes); }); this.loadLocalRecipeRepositories(this.catalogManager.getRecipes()); } register(localRepository: LocalRepository): Disposable { this.repositories.set(localRepository.path, localRepository); this.notify(); return Disposable.create(() => { this.unregister(localRepository.path); }); } unregister(path: string): void { this.repositories.delete(path); this.notify(); } async deleteLocalRepository(path: string): Promise { await fs.promises.rm(path, { recursive: true, force: true, maxRetries: 3 }); // once it has been removed, it also update the localRepo list this.unregister(path); } getLocalRepositories(): LocalRepository[] { return Array.from(this.repositories.values()); } private loadLocalRecipeRepositories(recipes: Recipe[]): void { recipes.forEach(recipe => { const recipeFolder = path.join(this.appUserDirectory, recipe.id); if (fs.existsSync(recipeFolder)) { this.register({ path: recipeFolder, sourcePath: path.join(recipeFolder, recipe.basedir ?? ''), labels: { 'recipe-id': recipe.id, }, }); } }); } } ================================================ FILE: packages/backend/src/registries/ModelHandlerRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { Publisher } from '../utils/Publisher'; import { Disposable } from '@podman-desktop/api'; import { MSG_MODEL_HANDLERS_UPDATE } from '@shared/Messages'; import type { ModelHandler } from '../models/ModelHandler'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export class ModelHandlerRegistry extends Publisher { #providers: Map; constructor(rpcExtension: RpcExtension) { super(rpcExtension, MSG_MODEL_HANDLERS_UPDATE, () => this.getAll().map(provider => provider.name)); this.#providers = new Map(); } register(provider: ModelHandler): Disposable { this.#providers.set(provider.name, provider); this.notify(); return Disposable.create(() => { this.unregister(provider); }); } unregister(provider: ModelHandler): void { this.#providers.delete(provider.name); this.notify(); } getAll(): ModelHandler[] { return Array.from(this.#providers.values()); } findModelHandler(url: string): ModelHandler | undefined { return Array.from(this.#providers.values()).find(modelHandler => modelHandler.accept(url)); } } ================================================ FILE: packages/backend/src/registries/NavigationRegistry.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeAll, afterAll, beforeEach, describe, expect, test, vi } from 'vitest'; import { commands, navigation, type WebviewPanel, type Disposable } from '@podman-desktop/api'; import { NavigationRegistry } from './NavigationRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_NAVIGATION_ROUTE_UPDATE } from '@shared/Messages'; vi.mock('@podman-desktop/api', async () => ({ commands: { registerCommand: vi.fn(), }, navigation: { register: vi.fn(), }, })); const panelMock: WebviewPanel = { reveal: vi.fn(), webview: { postMessage: vi.fn(), }, } as unknown as WebviewPanel; const rpcExtensionMock = { fire: vi.fn(), } as unknown as RpcExtension; beforeEach(() => { vi.resetAllMocks(); vi.restoreAllMocks(); }); describe('incompatible podman-desktop', () => { let register: typeof navigation.register | undefined; beforeAll(() => { register = navigation.register; (navigation.register as unknown as undefined) = undefined; }); afterAll(() => { if (!register) return; navigation.register = register; }); test('init should not register command and navigation when using old version of podman', () => { (navigation.register as unknown as undefined) = undefined; const registry = new NavigationRegistry(panelMock, rpcExtensionMock); registry.init(); expect(commands.registerCommand).not.toHaveBeenCalled(); }); }); test('init should register command and navigation', () => { const registry = new NavigationRegistry(panelMock, rpcExtensionMock); registry.init(); expect(commands.registerCommand).toHaveBeenCalled(); expect(navigation.register).toHaveBeenCalled(); }); test('dispose should dispose all command and navigation registered', () => { const registry = new NavigationRegistry(panelMock, rpcExtensionMock); const disposables: Disposable[] = []; vi.mocked(commands.registerCommand).mockImplementation(() => { const disposable: Disposable = { dispose: vi.fn(), }; disposables.push(disposable); return disposable; }); vi.mocked(navigation.register).mockImplementation(() => { const disposable: Disposable = { dispose: vi.fn(), }; disposables.push(disposable); return disposable; }); registry.dispose(); disposables.forEach((disposable: Disposable) => { expect(disposable.dispose).toHaveBeenCalledOnce(); }); }); test('navigateToInferenceCreate should reveal and postMessage to webview', async () => { const registry = new NavigationRegistry(panelMock, rpcExtensionMock); await registry.navigateToInferenceCreate('dummyTrackingId'); await vi.waitFor(() => { expect(panelMock.reveal).toHaveBeenCalledOnce(); }); expect(rpcExtensionMock.fire).toHaveBeenCalledWith( MSG_NAVIGATION_ROUTE_UPDATE, '/service/create?trackingId=dummyTrackingId', ); }); test('navigateToRecipeStart should reveal and postMessage to webview', async () => { const registry = new NavigationRegistry(panelMock, rpcExtensionMock); await registry.navigateToRecipeStart('dummyRecipeId', 'dummyTrackingId'); await vi.waitFor(() => { expect(panelMock.reveal).toHaveBeenCalledOnce(); }); expect(rpcExtensionMock.fire).toHaveBeenCalledWith( MSG_NAVIGATION_ROUTE_UPDATE, '/recipe/dummyRecipeId/start?trackingId=dummyTrackingId', ); }); test('reading the route has side-effect', async () => { const registry = new NavigationRegistry(panelMock, rpcExtensionMock); await registry.navigateToRecipeStart('dummyRecipeId', 'dummyTrackingId'); expect(registry.readRoute()).toBeDefined(); expect(registry.readRoute()).toBeUndefined(); }); ================================================ FILE: packages/backend/src/registries/NavigationRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type Disposable, navigation, type WebviewPanel, commands } from '@podman-desktop/api'; import { MSG_NAVIGATION_ROUTE_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; export const RECIPE_START_ROUTE = 'recipe.start'; export const RECIPE_START_NAVIGATE_COMMAND = 'ai-lab.navigation.recipe.start'; export const INFERENCE_CREATE_ROUTE = 'inference.create'; export const INFERENCE_CREATE_NAVIGATE_COMMAND = 'ai-lab.navigation.inference.create'; export class NavigationRegistry implements Disposable { #disposables: Disposable[] = []; #route: string | undefined = undefined; constructor( private panel: WebviewPanel, private rpcExtension: RpcExtension, ) {} init(): void { if (!navigation.register) { console.warn('this version of podman-desktop do not support task actions: some feature will not be available.'); return; } // register the recipes start navigation and command this.#disposables.push( commands.registerCommand(RECIPE_START_NAVIGATE_COMMAND, this.navigateToRecipeStart.bind(this)), ); this.#disposables.push(navigation.register(RECIPE_START_ROUTE, RECIPE_START_NAVIGATE_COMMAND)); // register the inference create navigation and command this.#disposables.push( commands.registerCommand(INFERENCE_CREATE_NAVIGATE_COMMAND, this.navigateToInferenceCreate.bind(this)), ); this.#disposables.push(navigation.register(INFERENCE_CREATE_ROUTE, INFERENCE_CREATE_NAVIGATE_COMMAND)); } /** * This function return the route, and reset it. * Meaning after read the route is undefined */ public readRoute(): string | undefined { const result: string | undefined = this.#route; this.#route = undefined; return result; } dispose(): void { this.#disposables.forEach(disposable => disposable.dispose()); } protected async updateRoute(route: string): Promise { await this.rpcExtension.fire(MSG_NAVIGATION_ROUTE_UPDATE, route); this.#route = route; this.panel.reveal(); } public async navigateToRecipeStart(recipeId: string, trackingId: string): Promise { return this.updateRoute(`/recipe/${recipeId}/start?trackingId=${trackingId}`); } public async navigateToInferenceCreate(trackingId: string): Promise { return this.updateRoute(`/service/create?trackingId=${trackingId}`); } } ================================================ FILE: packages/backend/src/registries/TaskRegistry.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import { TaskRegistry } from './TaskRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; const rpcExtension = { fire: vi.fn(), } as unknown as RpcExtension; beforeEach(() => { vi.resetAllMocks(); vi.mocked(rpcExtension.fire).mockResolvedValue(true); }); test('should not have any tasks by default', () => { const taskRegistry = new TaskRegistry(rpcExtension); expect(taskRegistry.getTasks().length).toBe(0); }); test('dispose should cleanup all tasks', () => { const taskRegistry = new TaskRegistry(rpcExtension); taskRegistry.createTask('random', 'loading'); expect(taskRegistry.getTasks()).toHaveLength(1); taskRegistry.dispose(); expect(taskRegistry.getTasks()).toHaveLength(0); }); test('should notify when create task', () => { const taskRegistry = new TaskRegistry(rpcExtension); taskRegistry.createTask('random', 'loading'); expect(rpcExtension.fire).toHaveBeenCalled(); }); test('should notify when update task', () => { const taskRegistry = new TaskRegistry(rpcExtension); const task = taskRegistry.createTask('random', 'loading'); taskRegistry.updateTask(task); expect(rpcExtension.fire).toHaveBeenCalledTimes(2); }); test('should get tasks by label', () => { const taskRegistry = new TaskRegistry(rpcExtension); taskRegistry.createTask('random-1', 'loading', { index: '1' }); taskRegistry.createTask('random-2', 'loading', { index: '2' }); const tasksWithIndex1 = taskRegistry.getTasksByLabels({ index: '1' }); const tasksWithIndex2 = taskRegistry.getTasksByLabels({ index: '2' }); expect(tasksWithIndex1.length).toBe(1); expect(tasksWithIndex2.length).toBe(1); expect(tasksWithIndex1[0].name).toBe('random-1'); expect(tasksWithIndex2[0].name).toBe('random-2'); }); test('should delete tasks by label', () => { const taskRegistry = new TaskRegistry(rpcExtension); taskRegistry.createTask('random-1', 'loading', { index: '1' }); taskRegistry.createTask('random-2', 'loading', { index: '2' }); taskRegistry.deleteByLabels({ index: '1' }); expect(taskRegistry.getTasks().length).toBe(1); expect(taskRegistry.getTasks()[0].name).toBe('random-2'); }); test('should get tasks by multiple labels', () => { const taskRegistry = new TaskRegistry(rpcExtension); taskRegistry.createTask('task-1', 'loading', { type: 'A', priority: 'high' }); taskRegistry.createTask('task-2', 'loading', { type: 'B', priority: 'low' }); taskRegistry.createTask('task-3', 'loading', { type: 'A', priority: 'medium' }); const tasksWithTypeA = taskRegistry.getTasksByLabels({ type: 'A' }); const tasksWithHighPriority = taskRegistry.getTasksByLabels({ priority: 'high' }); const tasksWithTypeAAndHighPriority = taskRegistry.getTasksByLabels({ type: 'A', priority: 'high' }); expect(tasksWithTypeA.length).toBe(2); expect(tasksWithHighPriority.length).toBe(1); expect(tasksWithTypeAAndHighPriority.length).toBe(1); expect(tasksWithTypeAAndHighPriority[0].name).toBe('task-1'); }); ================================================ FILE: packages/backend/src/registries/TaskRegistry.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type Disposable } from '@podman-desktop/api'; import type { Task, TaskState } from '@shared/models/ITask'; import { MSG_TASKS_UPDATE } from '@shared/Messages'; import type { RpcExtension } from '@shared/messages/MessageProxy'; /** * A registry for managing tasks. */ export class TaskRegistry implements Disposable { private counter: number = 0; private tasks: Map = new Map(); /** * Constructs a new TaskRegistry. * @param rpcExtension The rpc extension instance to use for communication. */ constructor(private rpcExtension: RpcExtension) {} dispose(): void { this.counter = 0; this.tasks.clear(); } /** * Retrieves a task by its ID. * @param id The ID of the task to retrieve. * @returns The task with the specified ID, or undefined if not found. */ get(id: string): Task | undefined { if (this.tasks.has(id)) return this.tasks.get(id); return undefined; } /** * Creates a new task. * @param name The name of the task. * @param state The initial state of the task. * @param labels Optional labels for the task. * @returns The newly created task. */ createTask(name: string, state: TaskState, labels: { [id: string]: string } = {}): Task { const task = { id: `task-${++this.counter}`, name: name, state: state, labels: labels, }; this.tasks.set(task.id, task); this.notify(); return task; } /** * Updates an existing task. * @param task The task to update. * @throws Error if the task with the specified ID does not exist. */ updateTask(task: Task): void { if (!this.tasks.has(task.id)) throw new Error(`Task with id ${task.id} does not exist.`); this.tasks.set(task.id, { ...task, state: task.error !== undefined ? 'error' : task.state, // enforce error state when error is defined }); this.notify(); } /** * Deletes a task by its ID. * @param taskId The ID of the task to delete. */ delete(taskId: string): void { this.deleteAll([taskId]); } /** * Deletes multiple tasks by their IDs. * @param taskIds The IDs of the tasks to delete. */ deleteAll(taskIds: string[]): void { taskIds.forEach(taskId => this.tasks.delete(taskId)); this.notify(); } /** * Retrieves all tasks. * @returns An array of all tasks. */ getTasks(): Task[] { return Array.from(this.tasks.values()); } /** * Retrieves tasks that match the specified labels. * @param requestedLabels The labels to match against. * @returns An array of tasks that match the specified labels. */ getTasksByLabels(requestedLabels: { [key: string]: string }): Task[] { return this.getTasks().filter(task => this.filter(task, requestedLabels)); } /** * Return the first task matching all the labels provided * @param requestedLabels */ findTaskByLabels(requestedLabels: { [key: string]: string }): Task | undefined { return this.getTasks().find(task => this.filter(task, requestedLabels)); } private filter(task: Task, requestedLabels: { [key: string]: string }): boolean { const labels = task.labels; if (labels === undefined) return false; for (const [key, value] of Object.entries(requestedLabels)) { if (!(key in labels) || labels[key] !== value) return false; } return true; } /** * Deletes tasks that match the specified labels. * @param labels The labels to match against for deletion. */ deleteByLabels(labels: { [key: string]: string }): void { this.deleteAll(this.getTasksByLabels(labels).map(task => task.id)); } private notify(): void { this.rpcExtension.fire(MSG_TASKS_UPDATE, this.getTasks()).catch((err: unknown) => { console.error('error notifying tasks', err); }); } } ================================================ FILE: packages/backend/src/studio-api-impl.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ /* eslint-disable @typescript-eslint/no-explicit-any */ import { beforeEach, expect, test, vi, describe } from 'vitest'; import content from './tests/ai-test.json'; import type { ApplicationManager } from './managers/application/applicationManager'; import { StudioApiImpl } from './studio-api-impl'; import type { InferenceManager } from './managers/inference/inferenceManager'; import type { ContainerProviderConnection, ProviderContainerConnection, TelemetryLogger } from '@podman-desktop/api'; import { window, EventEmitter, navigation } from '@podman-desktop/api'; import { CatalogManager } from './managers/catalogManager'; import type { ModelsManager } from './managers/modelsManager'; import { timeout } from './utils/utils'; import type { TaskRegistry } from './registries/TaskRegistry'; import { LocalRepositoryRegistry } from './registries/LocalRepositoryRegistry'; import type { Recipe } from '@shared/models/IRecipe'; import type { PlaygroundV2Manager } from './managers/playgroundV2Manager'; import type { SnippetManager } from './managers/SnippetManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { CancellationTokenRegistry } from './registries/CancellationTokenRegistry'; import path from 'node:path'; import type { LocalModelImportInfo } from '@shared/models/ILocalModelInfo'; import * as podman from './utils/podman'; import type { ConfigurationRegistry } from './registries/ConfigurationRegistry'; import type { RecipeManager } from './managers/recipes/RecipeManager'; import type { PodmanConnection } from './managers/podmanConnection'; import type { NavigationRegistry } from './registries/NavigationRegistry'; import type { RpcExtension } from '@shared/messages/MessageProxy'; vi.mock('./ai.json', () => { return { default: content, }; }); vi.mock('node:fs', () => { return { existsSync: vi.fn(), promises: { readFile: vi.fn(), }, }; }); const mocks = vi.hoisted(() => ({ withProgressMock: vi.fn(), showWarningMessageMock: vi.fn(), deleteApplicationMock: vi.fn(), uriFileMock: vi.fn(), openExternalMock: vi.fn(), })); vi.mock('@podman-desktop/api', async () => { return { EventEmitter: vi.fn(), window: { withProgress: mocks.withProgressMock, showWarningMessage: mocks.showWarningMessageMock, showErrorMessage: vi.fn(), showOpenDialog: vi.fn(), }, ProgressLocation: { TASK_WIDGET: 'TASK_WIDGET', }, fs: { createFileSystemWatcher: (): unknown => ({ onDidCreate: vi.fn(), onDidDelete: vi.fn(), onDidChange: vi.fn(), }), }, Uri: { file: mocks.uriFileMock, }, env: { openExternal: mocks.openExternalMock, }, navigation: { navigateToResources: vi.fn(), navigateToEditProviderContainerConnection: vi.fn(), }, }; }); let studioApiImpl: StudioApiImpl; let catalogManager: CatalogManager; let localRepositoryRegistry: LocalRepositoryRegistry; let applicationManager: ApplicationManager; const podmanConnectionMock: PodmanConnection = { findRunningContainerProviderConnection: vi.fn(), } as unknown as PodmanConnection; beforeEach(async () => { vi.resetAllMocks(); const appUserDirectory = '.'; // Creating CatalogManager catalogManager = new CatalogManager( { fire: vi.fn().mockResolvedValue(true), } as unknown as RpcExtension, appUserDirectory, ); applicationManager = { removeApplication: mocks.deleteApplicationMock, requestPullApplication: vi.fn(), } as unknown as ApplicationManager; localRepositoryRegistry = new LocalRepositoryRegistry( { fire: vi.fn().mockResolvedValue(true), } as unknown as RpcExtension, appUserDirectory, {} as unknown as CatalogManager, ); const telemetryMock = { logUsage: vi.fn(), logError: vi.fn(), } as unknown as TelemetryLogger; // Creating StudioApiImpl studioApiImpl = new StudioApiImpl( applicationManager, catalogManager, {} as ModelsManager, telemetryMock, localRepositoryRegistry, {} as unknown as TaskRegistry, {} as unknown as InferenceManager, {} as unknown as PlaygroundV2Manager, {} as unknown as SnippetManager, {} as unknown as CancellationTokenRegistry, {} as unknown as ConfigurationRegistry, {} as unknown as RecipeManager, podmanConnectionMock, {} as unknown as NavigationRegistry, ); vi.mock('node:fs'); const listeners: ((value: unknown) => void)[] = []; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter); }); describe.each([true, false])('with model is %o', withModel => { test('expect requestPullApplication to provide a tracking id', async () => { const connectionMock = { name: 'Podman machine', } as unknown as ContainerProviderConnection; vi.mocked(podmanConnectionMock.findRunningContainerProviderConnection).mockReturnValue(connectionMock); vi.spyOn(catalogManager, 'getRecipes').mockReturnValue([ { id: 'recipe 1', } as unknown as Recipe, ]); vi.spyOn(catalogManager, 'getModelById').mockReturnValue({ id: 'model 1', } as unknown as ModelInfo); vi.mocked(applicationManager.requestPullApplication).mockResolvedValue('dummy-tracker'); const recipeId = 'recipe 1'; let modelId: string | undefined; if (withModel) { modelId = 'model1'; } const trackingId = await studioApiImpl.requestPullApplication(withModel ? { recipeId, modelId } : { recipeId }); expect(applicationManager.requestPullApplication).toHaveBeenCalledWith({ connection: connectionMock, recipe: expect.objectContaining({ id: 'recipe 1', }), model: withModel ? expect.objectContaining({ id: 'model 1', }) : undefined, }); expect(trackingId).toBe('dummy-tracker'); }); }); test('requestRemoveApplication should ask confirmation', async () => { vi.spyOn(catalogManager, 'getRecipeById').mockReturnValue({ name: 'Recipe 1', } as unknown as Recipe); mocks.showWarningMessageMock.mockResolvedValue('Confirm'); await studioApiImpl.requestRemoveApplication('recipe-id-1', 'model-id-1'); await timeout(0); expect(mocks.deleteApplicationMock).toHaveBeenCalled(); }); test('requestDeleteLocalRepository should ask confirmation', async () => { mocks.showWarningMessageMock.mockResolvedValue('Confirm'); const deleteLocalRepositoryMock = vi.spyOn(localRepositoryRegistry, 'deleteLocalRepository').mockResolvedValue(); await studioApiImpl.requestDeleteLocalRepository('path'); await timeout(0); expect(deleteLocalRepositoryMock).toHaveBeenCalled(); }); test('if requestDeleteLocalRepository fails an errorMessage should show up', async () => { mocks.showWarningMessageMock.mockResolvedValue('Confirm'); const deleteLocalRepositoryMock = vi .spyOn(localRepositoryRegistry, 'deleteLocalRepository') .mockRejectedValue('error deleting'); const errorMessageMock = vi.spyOn(window, 'showErrorMessage').mockResolvedValue(''); await studioApiImpl.requestDeleteLocalRepository('path'); await timeout(0); expect(deleteLocalRepositoryMock).toHaveBeenCalled(); expect(errorMessageMock).toBeCalledWith('Error deleting local path "path". Error: error deleting'); }); describe.each([{ os: 'windows' }, { os: 'linux' }, { os: 'macos' }])('verify openVSCode', ({ os }) => { test(`check openVSCode generates the correct URL on ${os}`, async () => { vi.mock('node:path'); vi.spyOn(path, 'isAbsolute').mockReturnValue(true); vi.spyOn(path, 'normalize').mockImplementation((path: string) => { return path; }); const folder = os === 'windows' ? 'C:\\\\Users\\\\podman-desktop\\\\work' : '/home/podman-desktop/work'; mocks.uriFileMock.mockImplementation((path: string) => { return { path: path, with: (change?: { scheme?: string; authority?: string; path?: string; query?: string; fragment?: string; }): unknown => { return { path: path, ...change, }; }, }; }); mocks.openExternalMock.mockResolvedValue(true); await studioApiImpl.openVSCode(folder); expect(mocks.openExternalMock).toHaveBeenCalledWith( expect.objectContaining({ path: expect.stringMatching(/^\//), authority: 'file', scheme: 'vscode' }), ); }); }); test('openDialog should call podmanDesktopAPi showOpenDialog', async () => { const openDialogMock = vi.spyOn(window, 'showOpenDialog'); await studioApiImpl.openDialog({ title: 'title', }); expect(openDialogMock).toBeCalledWith({ title: 'title', }); }); test('importModels should call catalogManager', async () => { const addLocalModelsMock = vi .spyOn(catalogManager, 'importUserModels') .mockImplementation((_models: LocalModelImportInfo[]) => Promise.resolve()); const models: LocalModelImportInfo[] = [ { name: 'name', path: 'path', }, { name: 'name1', path: 'path1', }, ]; await studioApiImpl.importModels(models); expect(addLocalModelsMock).toBeCalledWith(models); }); describe('validateLocalModel', () => { test('Expect validateLocalModel to complete as path is valid', async () => { vi.mock('node:path'); vi.spyOn(path, 'resolve').mockImplementation((path: string) => { return path; }); vi.spyOn(path, 'join').mockImplementation((path1: string, path2: string) => `${path1}/${path2}`); vi.spyOn(studioApiImpl, 'getModelsInfo').mockResolvedValue([ { id: 'model', file: { path: 'path1', file: 'file.gguf', }, } as unknown as ModelInfo, ]); await studioApiImpl.validateLocalModel({ path: 'path', name: 'file.gguf', }); }); test('Expect validateLocalModel to raise an error as path is valid', async () => { vi.mock('node:path'); vi.spyOn(path, 'resolve').mockImplementation((path: string) => { return path; }); vi.spyOn(path, 'dirname').mockReturnValue('path'); vi.spyOn(path, 'basename').mockReturnValue('file.gguf'); vi.spyOn(path, 'join').mockImplementation((path1: string, path2: string) => `${path1}/${path2}`); vi.spyOn(studioApiImpl, 'getModelsInfo').mockResolvedValue([ { id: 'model', file: { path: 'path', file: 'file.gguf', }, } as unknown as ModelInfo, ]); await expect( studioApiImpl.validateLocalModel({ path: 'path/file.gguf', name: 'file', }), ).rejects.toThrowError('file already imported'); }); }); test('navigateToResources should call navigation.navigateToResources', async () => { const navigationSpy = vi.spyOn(navigation, 'navigateToResources'); await studioApiImpl.navigateToResources(); await timeout(0); expect(navigationSpy).toHaveBeenCalled(); }); test('navigateToEditConnectionProvider should call navigation.navigateToEditProviderContainerConnection', async () => { const connection: ProviderContainerConnection = { providerId: 'id', connection: { endpoint: { socketPath: '/path', }, name: 'name', type: 'podman', status: vi.fn(), }, }; vi.spyOn(podman, 'getPodmanConnection').mockReturnValue(connection); const navigationSpy = vi.spyOn(navigation, 'navigateToEditProviderContainerConnection'); await studioApiImpl.navigateToEditConnectionProvider('connection'); await timeout(0); expect(navigationSpy).toHaveBeenCalledWith(connection); }); ================================================ FILE: packages/backend/src/studio-api-impl.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { StudioAPI } from '@shared/StudioAPI'; import type { ApplicationManager } from './managers/application/applicationManager'; import type { ModelInfo } from '@shared/models/IModelInfo'; import * as podmanDesktopApi from '@podman-desktop/api'; import type { CatalogManager } from './managers/catalogManager'; import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog'; import type { ModelsManager } from './managers/modelsManager'; import type { ApplicationState } from '@shared/models/IApplicationState'; import type { Task } from '@shared/models/ITask'; import type { TaskRegistry } from './registries/TaskRegistry'; import type { LocalRepository } from '@shared/models/ILocalRepository'; import type { LocalRepositoryRegistry } from './registries/LocalRepositoryRegistry'; import path from 'node:path'; import type { InferenceServer, InferenceType } from '@shared/models/IInference'; import type { CreationInferenceServerOptions } from '@shared/models/InferenceServerConfig'; import type { InferenceManager } from './managers/inference/inferenceManager'; import type { Conversation } from '@shared/models/IPlaygroundMessage'; import type { PlaygroundV2Manager } from './managers/playgroundV2Manager'; import { getFreeRandomPort } from './utils/ports'; import { withDefaultConfiguration } from './utils/inferenceUtils'; import type { RequestOptions } from '@shared/models/RequestOptions'; import type { SnippetManager } from './managers/SnippetManager'; import type { Language } from 'postman-code-generators'; import type { ModelOptions } from '@shared/models/IModelOptions'; import type { CancellationTokenRegistry } from './registries/CancellationTokenRegistry'; import type { LocalModelImportInfo } from '@shared/models/ILocalModelInfo'; import { getPodmanConnection } from './utils/podman'; import type { CheckContainerConnectionResourcesOptions, ContainerConnectionInfo, ContainerProviderConnectionInfo, } from '@shared/models/IContainerConnectionInfo'; import type { ExtensionConfiguration } from '@shared/models/IExtensionConfiguration'; import type { ConfigurationRegistry } from './registries/ConfigurationRegistry'; import type { RecipeManager } from './managers/recipes/RecipeManager'; import type { PodmanConnection } from './managers/podmanConnection'; import { isRecipePullOptionsWithModelInference, type RecipePullOptions } from '@shared/models/IRecipe'; import type { ContainerProviderConnection } from '@podman-desktop/api'; import type { NavigationRegistry } from './registries/NavigationRegistry'; import type { FilterRecipesResult, RecipeFilters } from '@shared/models/FilterRecipesResult'; import type { ApplicationOptions } from './models/ApplicationOptions'; interface PortQuickPickItem extends podmanDesktopApi.QuickPickItem { port: number; } export class StudioApiImpl implements StudioAPI { constructor( private applicationManager: ApplicationManager, private catalogManager: CatalogManager, private modelsManager: ModelsManager, private telemetry: podmanDesktopApi.TelemetryLogger, private localRepositories: LocalRepositoryRegistry, private taskRegistry: TaskRegistry, private inferenceManager: InferenceManager, private playgroundV2: PlaygroundV2Manager, private snippetManager: SnippetManager, private cancellationTokenRegistry: CancellationTokenRegistry, private configurationRegistry: ConfigurationRegistry, private recipeManager: RecipeManager, private podmanConnection: PodmanConnection, private navigationRegistry: NavigationRegistry, ) {} async readRoute(): Promise { return this.navigationRegistry.readRoute(); } async requestDeleteConversation(conversationId: string): Promise { // Do not wait on the promise as the api would probably timeout before the user answer. podmanDesktopApi.window .showWarningMessage(`Are you sure you want to delete this playground ?`, 'Confirm', 'Cancel') .then((result: string | undefined) => { if (result === 'Confirm') { this.playgroundV2.deleteConversation(conversationId); } }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } async requestCreatePlayground(name: string, model: ModelInfo): Promise { try { return await this.playgroundV2.requestCreatePlayground(name, model); } catch (err: unknown) { console.error('Something went wrong while trying to create playground environment', err); throw err; } } submitPlaygroundMessage(containerId: string, userInput: string, options?: ModelOptions): Promise { return this.playgroundV2.submit(containerId, userInput, options); } async setPlaygroundSystemPrompt(conversationId: string, content: string | undefined): Promise { this.playgroundV2.setSystemPrompt(conversationId, content); } async getPlaygroundConversations(): Promise { return this.playgroundV2.getConversations(); } async getExtensionConfiguration(): Promise { return this.configurationRegistry.getExtensionConfiguration(); } async getPodmanDesktopVersion(): Promise { return this.configurationRegistry.getPodmanDesktopVersion(); } async updateExtensionConfiguration(update: Partial): Promise { return this.configurationRegistry.updateExtensionConfiguration(update); } async getSnippetLanguages(): Promise { return this.snippetManager.getLanguageList(); } createSnippet(options: RequestOptions, language: string, variant: string): Promise { return this.snippetManager.generate(options, language, variant); } async getInferenceServers(): Promise { return this.inferenceManager.getServers(); } async getRegisteredProviders(): Promise { return this.inferenceManager.getRegisteredProviders(); } async requestDeleteInferenceServer(...containerIds: string[]): Promise { // Do not wait on the promise as the api would probably timeout before the user answer. if (containerIds.length === 0) throw new Error('At least one container id should be provided.'); let dialogMessage: string; if (containerIds.length === 1) { dialogMessage = `Are you sure you want to delete this service ?`; } else { dialogMessage = `Are you sure you want to delete those ${containerIds.length} services ?`; } podmanDesktopApi.window .showWarningMessage(dialogMessage, 'Confirm', 'Cancel') .then((result: string | undefined) => { if (result !== 'Confirm') return; Promise.all(containerIds.map(containerId => this.inferenceManager.deleteInferenceServer(containerId))).catch( (err: unknown) => { console.error('Something went wrong while trying to delete the inference server', err); }, ); }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } async requestCreateInferenceServer(options: CreationInferenceServerOptions): Promise { try { const config = await withDefaultConfiguration(options); return this.inferenceManager.requestCreateInferenceServer(config); } catch (err: unknown) { console.error('Something went wrong while trying to start inference server', err); throw err; } } startInferenceServer(containerId: string): Promise { return this.inferenceManager.startInferenceServer(containerId); } stopInferenceServer(containerId: string): Promise { return this.inferenceManager.stopInferenceServer(containerId); } async ping(): Promise { return 'pong'; } async openURL(url: string): Promise { return await podmanDesktopApi.env.openExternal(podmanDesktopApi.Uri.parse(url)); } async openFile(file: string, recipeId?: string): Promise { const telemetry: Record = { 'recipe.id': recipeId, }; try { return await podmanDesktopApi.env.openExternal(podmanDesktopApi.Uri.file(file)); } catch (err) { telemetry['errorMessage'] = String(err); throw err; } finally { this.telemetry.logUsage('studio.open-file', telemetry); } } async openDialog(options?: podmanDesktopApi.OpenDialogOptions): Promise { return await podmanDesktopApi.window.showOpenDialog(options); } async cloneApplication(recipeId: string): Promise { const recipe = this.catalogManager.getRecipes().find(recipe => recipe.id === recipeId); if (!recipe) throw new Error(`recipe with if ${recipeId} not found`); return this.recipeManager.cloneRecipe(recipe); } async getContainerProviderConnection(): Promise { return this.podmanConnection.getContainerProviderConnectionInfo(); } async requestPullApplication(options: RecipePullOptions): Promise { const recipe = this.catalogManager.getRecipes().find(recipe => recipe.id === options.recipeId); if (!recipe) throw new Error(`recipe with if ${options.recipeId} not found`); let connection: ContainerProviderConnection | undefined = undefined; if (options.connection) { connection = this.podmanConnection.getContainerProviderConnection(options.connection); } else { connection = this.podmanConnection.findRunningContainerProviderConnection(); } if (!connection) throw new Error('no running container provider connection found.'); let model: ModelInfo | undefined; let opts: ApplicationOptions; if (isRecipePullOptionsWithModelInference(options)) { model = this.catalogManager.getModelById(options.modelId); opts = { connection, recipe, dependencies: options.dependencies, model, }; } else { opts = { connection, recipe, dependencies: options.dependencies, }; } return this.applicationManager.requestPullApplication(opts); } async getModelsInfo(): Promise { return this.modelsManager.getModelsInfo(); } getModelMetadata(modelId: string): Promise> { return this.modelsManager.getModelMetadata(modelId); } async getCatalog(): Promise { return this.catalogManager.getCatalog(); } async filterRecipes(filters: RecipeFilters): Promise { return this.catalogManager.filterRecipes(filters); } async requestRemoveLocalModel(modelId: string): Promise { const modelInfo = this.modelsManager.getLocalModelInfo(modelId); // Do not wait on the promise as the api would probably timeout before the user answer. podmanDesktopApi.window .showWarningMessage( `Are you sure you want to delete ${modelId} ? The following files will be removed from disk "${modelInfo.file}".`, 'Confirm', 'Cancel', ) .then((result: string | undefined) => { if (result === 'Confirm') { this.modelsManager.deleteModel(modelId).catch((err: unknown) => { console.error('Something went wrong while deleting the models', err); // Lets reloads the models (could fix the issue) this.modelsManager.loadLocalModels().catch((err: unknown) => { console.error('Cannot reload the models', err); }); }); } }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } navigateToContainer(containerId: string): Promise { return podmanDesktopApi.navigation.navigateToContainer(containerId); } async navigateToPod(podId: string): Promise { const pods = await podmanDesktopApi.containerEngine.listPods(); const pod = pods.find(pod => pod.Id === podId); if (pod === undefined) throw new Error(`Pod with id ${podId} not found.`); return podmanDesktopApi.navigation.navigateToPod(pod.kind, pod.Name, pod.engineId); } async navigateToResources(): Promise { // navigateToResources is only available from desktop 1.10 if (podmanDesktopApi.navigation.navigateToResources) { return podmanDesktopApi.navigation.navigateToResources(); } } async navigateToEditConnectionProvider(connectionName: string): Promise { // navigateToEditProviderContainerConnection is only available from desktop 1.10 if (podmanDesktopApi.navigation.navigateToEditProviderContainerConnection) { const connection = getPodmanConnection(connectionName); return podmanDesktopApi.navigation.navigateToEditProviderContainerConnection(connection); } } async getApplicationsState(): Promise { return this.applicationManager.getApplicationsState(); } async requestStartApplication(recipeId: string, modelId: string): Promise { this.applicationManager.startApplication(recipeId, modelId).catch((err: unknown) => { console.error('Something went wrong while trying to start application', err); }); } async requestStopApplication(recipeId: string, modelId: string): Promise { this.applicationManager.stopApplication(recipeId, modelId).catch((err: unknown) => { console.error('Something went wrong while trying to stop application', err); }); } async requestRemoveApplication(recipeId: string, modelId: string): Promise { const recipe = this.catalogManager.getRecipeById(recipeId); // Do not wait on the promise as the api would probably timeout before the user answer. podmanDesktopApi.window .showWarningMessage( `Delete the AI App "${recipe.name}"? This will delete the containers running the application and model.`, 'Confirm', 'Cancel', ) .then((result: string | undefined) => { if (result === 'Confirm') { this.applicationManager.removeApplication(recipeId, modelId).catch((err: unknown) => { console.error(`error deleting AI App's pod: ${String(err)}`); podmanDesktopApi.window .showErrorMessage( `Error deleting the AI App "${recipe.name}". You can try to stop and delete the AI App's pod manually.`, ) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); }); } }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } async requestRestartApplication(recipeId: string, modelId: string): Promise { const recipe = this.catalogManager.getRecipeById(recipeId); // get the state of the application const state = this.applicationManager .getApplicationsState() .find(state => state.recipeId === recipeId && state.modelId === modelId); if (!state) throw new Error('application is not running.'); // get the corresponding connection const connection = await this.podmanConnection.getConnectionByEngineId(state.pod.engineId); // Do not wait on the promise as the api would probably timeout before the user answer. podmanDesktopApi.window .showWarningMessage( `Restart the AI App "${recipe.name}"? This will delete the containers running the application and model, rebuild the images with the current sources, and restart the containers.`, 'Confirm', 'Cancel', ) .then((result: string | undefined) => { if (result === 'Confirm') { this.applicationManager.restartApplication(connection, recipeId, modelId).catch((err: unknown) => { console.error(`error restarting AI App: ${String(err)}`); podmanDesktopApi.window .showErrorMessage(`Error restarting the AI App "${recipe.name}"`) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); }); } }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } async requestOpenApplication(recipeId: string, modelId: string): Promise { const recipe = this.catalogManager.getRecipeById(recipeId); this.applicationManager .getApplicationPorts(recipeId, modelId) .then((ports: number[]) => { if (ports.length === 0) { podmanDesktopApi.window .showErrorMessage(`AI App ${recipe.name} has no application ports to open`) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } else if (ports.length === 1) { const uri = `http://localhost:${ports[0]}`; podmanDesktopApi.env.openExternal(podmanDesktopApi.Uri.parse(uri)).catch((err: unknown) => { console.error(`Something went wrong while opening ${uri}`, err); }); } else { podmanDesktopApi.window .showQuickPick( ports.map(p => { const item: PortQuickPickItem = { port: p, label: `${p}`, description: `Port ${p}` }; return item; }), { placeHolder: 'Select the port to open' }, ) .then((selectedPort: PortQuickPickItem | undefined) => { if (!selectedPort) return; const uri = `http://localhost:${selectedPort.port}`; podmanDesktopApi.env.openExternal(podmanDesktopApi.Uri.parse(uri)).catch((err: unknown) => { console.error(`Something went wrong while opening ${uri}`, err); }); }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } }) .catch((err: unknown) => { console.error(`error opening AI App: ${String(err)}`); podmanDesktopApi.window.showErrorMessage(`Error opening the AI App "${recipe.name}"`).catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); }); } async telemetryLogUsage(eventName: string, data?: Record): Promise { this.telemetry.logUsage(eventName, data); } async telemetryLogError(eventName: string, data?: Record): Promise { this.telemetry.logError(eventName, data); } async getLocalRepositories(): Promise { return this.localRepositories.getLocalRepositories(); } async getTasks(): Promise { return this.taskRegistry.getTasks(); } async openVSCode(directory: string, recipeId?: string): Promise { const telemetry: Record = { 'recipe.id': recipeId, }; try { if (!path.isAbsolute(directory)) { throw new Error('Do not support relative directory.'); } let unixPath: string = path.normalize(directory).replace(/[\\/]+/g, '/'); if (!unixPath.startsWith('/')) { unixPath = `/${unixPath}`; } await podmanDesktopApi.env.openExternal( podmanDesktopApi.Uri.file(unixPath).with({ scheme: 'vscode', authority: 'file' }), ); } catch (err) { telemetry['errorMessage'] = String(err); console.error('Something went wrong while trying to open VSCode', err); throw err; } finally { this.telemetry.logUsage('studio.open-vscode', telemetry); } } async downloadModel(modelId: string): Promise { const modelInfo: ModelInfo = this.modelsManager.getModelInfo(modelId); // Do not wait for the download task as it is too long. this.modelsManager.requestDownloadModel(modelInfo).catch((err: unknown) => { console.error(`Something went wrong while trying to download the model ${modelId}`, err); }); } getHostFreePort(): Promise { return getFreeRandomPort('0.0.0.0'); } async requestDeleteLocalRepository(path: string): Promise { // Do not wait on the promise as the api would probably timeout before the user answer. podmanDesktopApi.window .showWarningMessage(`Delete permanently "${path}"?`, 'Confirm', 'Cancel') .then((result: string | undefined) => { if (result === 'Confirm') { this.localRepositories.deleteLocalRepository(path).catch((err: unknown) => { console.error(`error deleting path: ${String(err)}`); podmanDesktopApi.window .showErrorMessage(`Error deleting local path "${path}". Error: ${String(err)}`) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); }); } }) .catch((err: unknown) => { console.error(`Something went wrong with confirmation modals`, err); }); } async requestCancelToken(tokenId: number): Promise { if (!this.cancellationTokenRegistry.hasCancellationTokenSource(tokenId)) throw new Error(`Cancellation token with id ${tokenId} does not exist.`); this.cancellationTokenRegistry.getCancellationTokenSource(tokenId)?.cancel(); } async importModels(models: LocalModelImportInfo[]): Promise { return this.catalogManager.importUserModels(models); } async validateLocalModel(model: LocalModelImportInfo): Promise { const catalogModels = await this.getModelsInfo(); for (const catalogModel of catalogModels) { if (!catalogModel.file) { continue; } if (catalogModel.file.path === path.dirname(model.path) && catalogModel.file.file === path.basename(model.path)) { throw new Error('file already imported'); } } } copyToClipboard(content: string): Promise { return podmanDesktopApi.env.clipboard.writeText(content); } async checkContainerConnectionStatusAndResources( options: CheckContainerConnectionResourcesOptions, ): Promise { return this.podmanConnection.checkContainerConnectionStatusAndResources(options); } } ================================================ FILE: packages/backend/src/studio.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ /* eslint-disable @typescript-eslint/no-explicit-any */ import { afterEach, beforeEach, expect, test, vi, type MockInstance } from 'vitest'; import { Studio } from './studio'; import { type ExtensionContext, EventEmitter } from '@podman-desktop/api'; import { CatalogManager } from './managers/catalogManager'; import * as fs from 'node:fs'; vi.mock('./managers/modelsManager'); vi.mock('./managers/catalogManager'); const mockedExtensionContext = { subscriptions: [], storagePath: 'dummy-storage-path', } as unknown as ExtensionContext; const studio = new Studio(mockedExtensionContext); const mocks = vi.hoisted(() => ({ listContainers: vi.fn(), getContainerConnections: vi.fn(), postMessage: vi.fn(), logErrorMock: vi.fn(), consoleWarnMock: vi.fn(), consoleLogMock: vi.fn(), })); vi.mock('@podman-desktop/api', async () => { return { configuration: { getConfiguration: (): unknown => ({ get: vi.fn(), }), onDidChangeConfiguration: vi.fn(), }, fs: { createFileSystemWatcher: (): unknown => ({ onDidCreate: vi.fn(), onDidDelete: vi.fn(), onDidChange: vi.fn(), }), }, EventEmitter: vi.fn(), Uri: class { static readonly joinPath = (): unknown => ({ fsPath: '.' }); }, window: { createWebviewPanel: (): unknown => ({ webview: { html: '', onDidReceiveMessage: vi.fn(), postMessage: mocks.postMessage, }, onDidChangeViewState: vi.fn(), }), createStatusBarItem: (): unknown => ({ show: vi.fn(), }), }, env: { createTelemetryLogger: (): unknown => ({ logUsage: vi.fn(), logError: mocks.logErrorMock, }), }, containerEngine: { onEvent: vi.fn(), listContainers: mocks.listContainers, }, navigation: { register: vi.fn(), }, provider: { onDidRegisterContainerConnection: vi.fn(), onDidUpdateContainerConnection: vi.fn(), onDidUnregisterContainerConnection: vi.fn(), onDidUpdateProvider: vi.fn(), getContainerConnections: mocks.getContainerConnections, }, commands: { registerCommand: vi.fn(), }, Disposable: { create: vi.fn(), }, }; }); /// mock console.log const originalConsoleLog = console.log; beforeEach(() => { vi.clearAllMocks(); console.log = mocks.consoleLogMock; console.warn = mocks.consoleWarnMock; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn(), fire: vi.fn(), } as unknown as EventEmitter); mocks.postMessage.mockResolvedValue(undefined); vi.mocked(CatalogManager).mockReturnValue({ onUpdate: vi.fn(), init: vi.fn(), getRecipes: vi.fn().mockReturnValue([]), } as unknown as CatalogManager); }); afterEach(() => { console.log = originalConsoleLog; }); test('check activate', async () => { mocks.listContainers.mockReturnValue([]); mocks.getContainerConnections.mockReturnValue([]); (vi.spyOn(fs.promises, 'readFile') as unknown as MockInstance<() => Promise>).mockImplementation(() => { return Promise.resolve(''); }); await studio.activate(); // expect the activate method to be called on the studio class expect(mocks.consoleLogMock).toBeCalledWith('starting AI Lab extension'); }); test('check deactivate ', async () => { await studio.deactivate(); // expect the deactivate method to be called on the studio class expect(mocks.consoleLogMock).toBeCalledWith('stopping AI Lab extension'); }); ================================================ FILE: packages/backend/src/studio.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { env } from '@podman-desktop/api'; import type { ExtensionContext, TelemetryLogger, WebviewPanel, WebviewPanelOnDidChangeViewStateEvent, } from '@podman-desktop/api'; import { RpcExtension } from '@shared/messages/MessageProxy'; import { StudioApiImpl } from './studio-api-impl'; import { ApplicationManager } from './managers/application/applicationManager'; import { GitManager } from './managers/gitManager'; import { TaskRegistry } from './registries/TaskRegistry'; import { CatalogManager } from './managers/catalogManager'; import { ModelsManager } from './managers/modelsManager'; import { ContainerRegistry } from './registries/ContainerRegistry'; import { PodmanConnection } from './managers/podmanConnection'; import { LocalRepositoryRegistry } from './registries/LocalRepositoryRegistry'; import { InferenceManager } from './managers/inference/inferenceManager'; import { PlaygroundV2Manager } from './managers/playgroundV2Manager'; import { SnippetManager } from './managers/SnippetManager'; import { CancellationTokenRegistry } from './registries/CancellationTokenRegistry'; import { BuilderManager } from './managers/recipes/BuilderManager'; import { PodManager } from './managers/recipes/PodManager'; import { initWebview } from './webviewUtils'; import { LlamaCppPython } from './workers/provider/LlamaCppPython'; import { InferenceProviderRegistry } from './registries/InferenceProviderRegistry'; import { ConfigurationRegistry } from './registries/ConfigurationRegistry'; import { RecipeManager } from './managers/recipes/RecipeManager'; import { GPUManager } from './managers/GPUManager'; import { WhisperCpp } from './workers/provider/WhisperCpp'; import { ApiServer } from './managers/apiServer'; import { InstructlabManager } from './managers/instructlab/instructlabManager'; import { InstructlabApiImpl } from './instructlab-api-impl'; import { NavigationRegistry } from './registries/NavigationRegistry'; import type { StudioAPI } from '@shared/StudioAPI'; import { STUDIO_API_CHANNEL } from '@shared/StudioAPI'; import type { InstructlabAPI } from '@shared/InstructlabAPI'; import { INSTRUCTLAB_API_CHANNEL } from '@shared/InstructlabAPI'; import { ModelHandlerRegistry } from './registries/ModelHandlerRegistry'; import { URLModelHandler } from './models/URLModelHandler'; import { HuggingFaceModelHandler } from './models/HuggingFaceModelHandler'; import { LlamaStackApiImpl } from './llama-stack-api-impl'; import { LLAMA_STACK_API_CHANNEL, type LlamaStackAPI } from '@shared/LlamaStackAPI'; import { LlamaStackManager } from './managers/llama-stack/llamaStackManager'; import { OpenVINO } from './workers/provider/OpenVINO'; import { McpServerManager } from './managers/playground/McpServerManager'; import os from 'node:os'; export class Studio { readonly #extensionContext: ExtensionContext; /** * Webview panel used by AI Lab */ #panel: WebviewPanel | undefined; /** * API related classes */ #rpcExtension: RpcExtension | undefined; #studioApi: StudioApiImpl | undefined; #instructlabApi: InstructlabApiImpl | undefined; #llamaStackApi: LlamaStackApiImpl | undefined; #localRepositoryRegistry: LocalRepositoryRegistry | undefined; #catalogManager: CatalogManager | undefined; #modelsManager: ModelsManager | undefined; #telemetry: TelemetryLogger | undefined; #inferenceManager: InferenceManager | undefined; #podManager: PodManager | undefined; #builderManager: BuilderManager | undefined; #containerRegistry: ContainerRegistry | undefined; #podmanConnection: PodmanConnection | undefined; #taskRegistry: TaskRegistry | undefined; #cancellationTokenRegistry: CancellationTokenRegistry | undefined; #snippetManager: SnippetManager | undefined; #mcpServerManager: McpServerManager | undefined; #playgroundManager: PlaygroundV2Manager | undefined; #applicationManager: ApplicationManager | undefined; #recipeManager: RecipeManager | undefined; #inferenceProviderRegistry: InferenceProviderRegistry | undefined; #configurationRegistry: ConfigurationRegistry | undefined; #gpuManager: GPUManager | undefined; #navigationRegistry: NavigationRegistry | undefined; #instructlabManager: InstructlabManager | undefined; #llamaStackManager: LlamaStackManager | undefined; constructor(readonly extensionContext: ExtensionContext) { this.#extensionContext = extensionContext; } public async activate(): Promise { console.log('starting AI Lab extension'); this.#telemetry = env.createTelemetryLogger(); /** * Storage directory for the extension provided by podman desktop */ const appUserDirectory = this.extensionContext.storagePath; this.#telemetry.logUsage('start'); /** * The AI Lab has a webview integrated in Podman Desktop * We need to initialize and configure it properly */ this.#panel = await initWebview(this.#extensionContext.extensionUri); this.#extensionContext.subscriptions.push(this.#panel); this.#panel.onDidChangeViewState((e: WebviewPanelOnDidChangeViewStateEvent) => { this.#telemetry?.logUsage(e.webviewPanel.visible ? 'opened' : 'closed'); }); /** * The RpcExtension handle the communication channels between the frontend and the backend */ this.#rpcExtension = new RpcExtension(this.#panel.webview); this.#rpcExtension.init(); this.#extensionContext.subscriptions.push(this.#rpcExtension); /** * The navigation registry is used * to register and managed the routes of the extension */ this.#navigationRegistry = new NavigationRegistry(this.#panel, this.#rpcExtension); this.#navigationRegistry.init(); this.#extensionContext.subscriptions.push(this.#navigationRegistry); /** * Cancellation token registry store the tokens used to cancel a task */ this.#cancellationTokenRegistry = new CancellationTokenRegistry(); this.#extensionContext.subscriptions.push(this.#cancellationTokenRegistry); /** * The configuration registry manage the extension preferences/settings */ this.#configurationRegistry = new ConfigurationRegistry(this.#rpcExtension, appUserDirectory); this.#configurationRegistry?.init(); this.#extensionContext.subscriptions.push(this.#configurationRegistry); /** * The container registry handle the events linked to containers (start, remove, die...) */ this.#containerRegistry = new ContainerRegistry(); this.#containerRegistry.init(); this.#extensionContext.subscriptions.push(this.#containerRegistry); /** * GitManager is used for cloning, pulling etc. recipes repositories */ const gitManager = new GitManager(); /** * The podman connection class is responsible for podman machine events (start/stop) */ this.#podmanConnection = new PodmanConnection(this.#rpcExtension); this.#podmanConnection.init(); this.#extensionContext.subscriptions.push(this.#podmanConnection); /** * The task registry store the tasks */ this.#taskRegistry = new TaskRegistry(this.#rpcExtension); this.#extensionContext.subscriptions.push(this.#taskRegistry); /** * Create catalog manager, responsible for loading the catalog files and watching for changes */ this.#catalogManager = new CatalogManager(this.#rpcExtension, appUserDirectory); await this.#catalogManager.init(); /** * The builder manager is handling the building tasks, create corresponding tasks * through the task registry and cancellation. */ this.#builderManager = new BuilderManager(this.#taskRegistry); this.#extensionContext.subscriptions.push(this.#builderManager); /** * The pod manager is a class responsible for managing the Pods */ this.#podManager = new PodManager(); this.#podManager.init(); this.#extensionContext.subscriptions.push(this.#podManager); /** * The ModelManager role is to download and */ const modelHandlerRegistry = new ModelHandlerRegistry(this.#rpcExtension); this.#modelsManager = new ModelsManager( this.#rpcExtension, this.#catalogManager, this.#telemetry, this.#taskRegistry, this.#cancellationTokenRegistry, this.#podmanConnection, this.#configurationRegistry, modelHandlerRegistry, ); const urlModelHandler = new URLModelHandler( this.#modelsManager, this.#configurationRegistry.getExtensionConfiguration().modelsPath, ); this.#extensionContext.subscriptions.push(urlModelHandler); this.#extensionContext.subscriptions.push(modelHandlerRegistry.register(urlModelHandler)); const hfModelHandler = new HuggingFaceModelHandler(this.#modelsManager); this.#extensionContext.subscriptions.push(hfModelHandler); this.#extensionContext.subscriptions.push(modelHandlerRegistry.register(hfModelHandler)); await this.#modelsManager.init(); this.#extensionContext.subscriptions.push(this.#modelsManager); /** * The LocalRepositoryRegistry store and watch for recipes repository locally and expose it. */ this.#localRepositoryRegistry = new LocalRepositoryRegistry( this.#rpcExtension, appUserDirectory, this.#catalogManager, ); this.#localRepositoryRegistry.init(); this.#extensionContext.subscriptions.push(this.#localRepositoryRegistry); /** * GPUManager is a class responsible for detecting and storing the GPU specs */ this.#gpuManager = new GPUManager(this.#rpcExtension); this.#extensionContext.subscriptions.push(this.#gpuManager); /** * The Inference Provider registry stores all the InferenceProvider (aka backend) which * can be used to create InferenceServers */ this.#inferenceProviderRegistry = new InferenceProviderRegistry(this.#rpcExtension); this.#extensionContext.subscriptions.push( this.#inferenceProviderRegistry.register( new LlamaCppPython(this.#taskRegistry, this.#podmanConnection, this.#gpuManager, this.#configurationRegistry), ), ); this.#extensionContext.subscriptions.push( this.#inferenceProviderRegistry.register(new WhisperCpp(this.#taskRegistry, this.#podmanConnection)), ); if (os.arch() === 'x64') { this.#extensionContext.subscriptions.push( this.#inferenceProviderRegistry.register( new OpenVINO(this.#taskRegistry, this.#podmanConnection, this.#modelsManager, this.#configurationRegistry), ), ); } /** * The inference manager create, stop, manage Inference servers */ this.#inferenceManager = new InferenceManager( this.#rpcExtension, this.#containerRegistry, this.#podmanConnection, this.#modelsManager, this.#telemetry, this.#taskRegistry, this.#inferenceProviderRegistry, this.#catalogManager, ); this.#inferenceManager.init(); this.#extensionContext.subscriptions.push(this.#inferenceManager); /** The InstructLab tuning sessions manager */ this.#instructlabManager = new InstructlabManager( appUserDirectory, this.#taskRegistry, this.#podmanConnection, this.#containerRegistry, this.#telemetry, ); this.#instructlabManager.init(); this.#extensionContext.subscriptions.push(this.#instructlabManager); /** The Llama Stack manager */ this.#llamaStackManager = new LlamaStackManager( appUserDirectory, this.#taskRegistry, this.#podmanConnection, this.#containerRegistry, this.#configurationRegistry, this.#telemetry, this.#modelsManager, ); this.#extensionContext.subscriptions.push(this.#llamaStackManager); this.#llamaStackManager.init(); /** * The recipe manage offer some andy methods to manage recipes, build get images etc. */ this.#recipeManager = new RecipeManager( appUserDirectory, gitManager, this.#taskRegistry, this.#builderManager, this.#localRepositoryRegistry, this.#inferenceManager, ); this.#recipeManager.init(); this.#extensionContext.subscriptions.push(this.#recipeManager); /** * The application manager is managing the Recipes */ this.#applicationManager = new ApplicationManager( this.#taskRegistry, this.#rpcExtension, this.#podmanConnection, this.#catalogManager, this.#modelsManager, this.#telemetry, this.#podManager, this.#recipeManager, this.#llamaStackManager, ); this.#applicationManager.init(); this.#extensionContext.subscriptions.push(this.#applicationManager); this.#mcpServerManager = new McpServerManager(this.#rpcExtension, appUserDirectory); this.#mcpServerManager.init(); this.#extensionContext.subscriptions.push(this.#mcpServerManager); /** * PlaygroundV2Manager handle the conversations of the Playground by using the InferenceServer available */ this.#playgroundManager = new PlaygroundV2Manager( this.#rpcExtension, this.#inferenceManager, this.#taskRegistry, this.#telemetry, this.#cancellationTokenRegistry, this.#mcpServerManager, ); this.#extensionContext.subscriptions.push(this.#playgroundManager); /** * The snippet manager provide code snippet used in the * InferenceServer details page */ this.#snippetManager = new SnippetManager(this.#rpcExtension, this.#telemetry); this.#snippetManager.init(); /** * The StudioApiImpl is the implementation of our API between backend and frontend */ this.#studioApi = new StudioApiImpl( this.#applicationManager, this.#catalogManager, this.#modelsManager, this.#telemetry, this.#localRepositoryRegistry, this.#taskRegistry, this.#inferenceManager, this.#playgroundManager, this.#snippetManager, this.#cancellationTokenRegistry, this.#configurationRegistry, this.#recipeManager, this.#podmanConnection, this.#navigationRegistry, ); // Register the instance this.#rpcExtension.registerInstance(STUDIO_API_CHANNEL, this.#studioApi); const apiServer = new ApiServer( this.#extensionContext, this.#modelsManager, this.#catalogManager, this.#inferenceManager, this.#configurationRegistry, this.#containerRegistry, ); await apiServer.init(); this.#extensionContext.subscriptions.push(apiServer); this.#instructlabApi = new InstructlabApiImpl(this.#instructlabManager); // Register the instance this.#rpcExtension.registerInstance( INSTRUCTLAB_API_CHANNEL, this.#instructlabApi, ); this.#llamaStackApi = new LlamaStackApiImpl(this.#llamaStackManager); // Register the instance this.#rpcExtension.registerInstance(LLAMA_STACK_API_CHANNEL, this.#llamaStackApi); } public async deactivate(): Promise { console.log('stopping AI Lab extension'); this.#telemetry?.logUsage('stop'); } } ================================================ FILE: packages/backend/src/templates/java-okhttp.mustache ================================================ pom.xml ======= com.squareup.okhttp okhttp 2.7.5 AiService.java ============== package io.podman.desktop.java.okhttp; import com.squareup.okhttp.MediaType; import com.squareup.okhttp.OkHttpClient; import com.squareup.okhttp.Request; import com.squareup.okhttp.RequestBody; import com.squareup.okhttp.Response; OkHttpClient client = new OkHttpClient(); MediaType mediaType = MediaType.parse("application/json"); String json = """ { "messages": [ { "content": "You are a helpful assistant.", "role": "system" }, { "content": "What is the capital of France?", "role": "user" } ] } """; RequestBody body = RequestBody.create(mediaType, json); Request request = new Request.Builder() .url("{{{ endpoint }}}") .method("POST", body) .addHeader("Content-Type", "application/json") .build(); Response response = client.newCall(request).execute(); ====== ================================================ FILE: packages/backend/src/templates/python-langchain.mustache ================================================ pip ======= pip install langchain langchain-openai AiService.py ============== from langchain_openai import OpenAI from langchain.chains import LLMChain from langchain_core.prompts import ChatPromptTemplate model_service = "{{{ endpoint }}}" llm = OpenAI(base_url=model_service, api_key="sk-no-key-required", streaming=True) prompt = ChatPromptTemplate.from_messages([ ("system", "You are a helpful assistant."), ("user", "What is the capital of France?") ]) chain = LLMChain(llm=llm, prompt=prompt) response = chain.invoke({ "messages": prompt }) print(response) ====== ================================================ FILE: packages/backend/src/templates/quarkus-langchain4j.mustache ================================================ application.properties ====================== quarkus.langchain4j.openai.base-url={{{ baseUrl }}} quarkus.langchain4j.openai.api-key=sk-dummy pom.xml ======= io.quarkiverse.langchain4j quarkus-langchain4j-core {{{ version }}} io.quarkiverse.langchain4j quarkus-langchain4j-openai {{{ version }}} AiService.java ============== package io.podman.desktop.quarkus.langchain4j; import dev.langchain4j.service.UserMessage; import io.quarkiverse.langchain4j.RegisterAiService; @RegisterAiService public interface AiService { @UserMessage("{question}") String request(String question); } ====== Inject AIService into REST resource or other CDI resource and use the request method to call the LLM model. That's it ================================================ FILE: packages/backend/src/tests/ai-test.json ================================================ { "version": "1.0", "recipes": [ { "id": "chatbot", "description": "Chat bot application", "name": "ChatBot", "repository": "https://github.com/axel7083/locallm", "icon": "natural-language-processing", "categories": ["natural-language-processing"], "basedir": "chatbot", "readme": "# Locallm\n\nThis repo contains artifacts that can be used to build and run LLM (Large Language Model) services locally on your Mac using podman. These containerized LLM services can be used to help developers quickly prototype new LLM based applications, without the need for relying on any other externally hosted services. Since they are already containerized, it also helps developers move from their prototype to production quicker. \n\n## Current Locallm Services: \n\n* [Chatbot](#chatbot)\n* [Text Summarization](#text-summarization)\n* [Fine-tuning](#fine-tuning)\n\n### Chatbot\n\nA simple chatbot using the gradio UI. Learn how to build and run this model service here: [Chatbot](/chatbot/).\n\n### Text Summarization\n\nAn LLM app that can summarize arbitrarily long text inputs. Learn how to build and run this model service here: [Text Summarization](/summarizer/).\n\n### Fine Tuning \n\nThis application allows a user to select a model and a data set they'd like to fine-tune that model on. Once the application finishes, it outputs a new fine-tuned model for the user to apply to other LLM services. Learn how to build and run this model training job here: [Fine-tuning](/finetune/).\n\n## Architecture\n![](https://raw.githubusercontent.com/MichaelClifford/locallm/main/assets/arch.jpg)\n\nThe diagram above indicates the general architecture for each of the individual model services contained in this repo. The core code available here is the \"LLM Task Service\" and the \"API Server\", bundled together under `model_services`. With an appropriately chosen model downloaded onto your host, `model_services/builds` contains the Containerfiles required to build an ARM or an x86 (with CUDA) image depending on your need. These model services are intended to be light-weight and run with smaller hardware footprints (given the Locallm name), but they can be run on any hardware that supports containers and scaled up if needed.\n\nWe also provide demo \"AI Applications\" under `ai_applications` for each model service to provide an example of how a developers could interact with the model service for their own needs. ", "recommended": ["llama-2-7b-chat.Q5_K_S", "albedobase-xl-1.3", "sdxl-turbo"] }, { "id": "recipe0", "name": "Recipe 1", "categories": [], "description": "", "repository": "", "readme": "" }, { "id": "recipe1", "name": "Recipe 1", "categories": [], "description": "", "repository": "", "readme": "", "backend": "tool1", "languages": ["lang1", "lang10"], "frameworks": ["fw1", "fw10"] }, { "id": "recipe2", "name": "Recipe 2", "categories": [], "description": "", "repository": "", "readme": "", "backend": "tool2", "languages": ["lang2", "lang10"], "frameworks": ["fw2", "fw10"] }, { "id": "recipe3", "name": "Recipe 3", "categories": [], "description": "", "repository": "", "readme": "", "backend": "tool3", "languages": ["lang3", "lang11"], "frameworks": ["fw2", "fw10", "fw11"] } ], "models": [ { "id": "llama-2-7b-chat.Q5_K_S", "name": "Llama-2-7B-Chat-GGUF", "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥", "registry": "Hugging Face", "license": "?", "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf" }, { "id": "albedobase-xl-1.3", "name": "AlbedoBase XL 1.3", "description": "Stable Diffusion XL has 6.6 billion parameters, which is about 6.6 times more than the SD v1.5 version. I believe that this is not just a number, but a number that can lead to a significant improvement in performance. It has been a while since we realized that the overall performance of SD v1.5 has improved beyond imagination thanks to the explosive contributions of our community. Therefore, I am working on completing this AlbedoBase XL model in order to optimally reproduce the performance improvement that occurred in v1.5 in this XL version as well. My goal is to directly test the performance of all Checkpoints and LoRAs that are publicly uploaded to Civitai, and merge only the resources that are judged to be optimal after passing through several filters. This will surpass the performance of image-generating AI of companies such as Midjourney. As of now, AlbedoBase XL v0.4 has merged exactly 55 selected checkpoints and 138 LoRAs.", "registry": "Civital", "license": "openrail++", "url": "" }, { "id": "sdxl-turbo", "name": "SDXL Turbo", "description": "SDXL Turbo achieves state-of-the-art performance with a new distillation technology, enabling single-step image generation with unprecedented quality, reducing the required step count from 50 to just one.", "registry": "Hugging Face", "license": "sai-c-community", "url": "" } ], "categories": [ { "id": "natural-language-processing", "name": "Natural Language Processing", "description": "Models that work with text: classify, summarize, translate, or generate text." }, { "id": "computer-vision", "description": "Process images, from classification to object detection and segmentation.", "name": "Computer Vision" }, { "id": "audio", "description": "Recognize speech or classify audio with audio models.", "name": "Audio" }, { "id": "multimodal", "description": "Stuff about multimodal models goes here omg yes amazing.", "name": "Multimodal" } ] } ================================================ FILE: packages/backend/src/tests/ai-user-test.json ================================================ { "version": "1.0", "recipes": [ { "id": "recipe 1", "description" : "Recipe 1", "name" : "Recipe 1", "repository": "https://recipe1.example.com", "icon": "natural-language-processing", "categories": [ "category1" ], "basedir": "chatbot", "readme": "Readme for recipe 1", "recommended": [ "model1", "model2" ] } ], "models": [ { "id": "model1", "name": "Model 1", "description": "Readme for model 1", "registry": "Hugging Face", "license": "?", "url": "https://model1.example.com", "memory": 0 }, { "id": "model2", "name": "Model 2", "description": "Readme for model 2", "registry": "Civital", "license": "?", "url": "https://model2.example.com", "memory": 0 } ], "categories": [ { "id": "category1", "name": "Category 1", "description" : "Readme for category 1" } ] } ================================================ FILE: packages/backend/src/tests/utils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ export class TestEventEmitter { #listeners: ((value: unknown) => void)[] = []; event: (listener: (value: unknown) => void) => void; constructor() { this.event = (listener): void => { this.#listeners.push(listener); }; } fire(value: unknown): void { this.#listeners.forEach(listener => listener(value)); } } ================================================ FILE: packages/backend/src/utils/JsonWatcher.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import { promises, existsSync, mkdirSync } from 'node:fs'; import type { FileSystemWatcher } from '@podman-desktop/api'; import { EventEmitter, fs } from '@podman-desktop/api'; import { JsonWatcher } from './JsonWatcher'; vi.mock('@podman-desktop/api', () => { return { EventEmitter: vi.fn(), fs: { createFileSystemWatcher: (): unknown => ({ onDidCreate: vi.fn(), onDidDelete: vi.fn(), onDidChange: vi.fn(), }), }, }; }); vi.mock('node:fs', () => { return { existsSync: vi.fn(), mkdirSync: vi.fn(), promises: { readFile: vi.fn(), }, }; }); beforeEach(() => { vi.resetAllMocks(); // Mock event emitters const listeners: ((value: unknown) => void)[] = []; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter); }); test('should provide default value', async () => { vi.mocked(existsSync).mockReturnValue(false); const watcher = new JsonWatcher('dummyPath', 'dummyDefaultvalue'); const listener = vi.fn(); watcher.onContentUpdated(listener); watcher.init(); await vi.waitFor(() => { expect(listener).toHaveBeenCalledWith('dummyDefaultvalue'); }); expect(mkdirSync).toHaveBeenCalled(); expect(existsSync).toHaveBeenCalledWith('dummyPath'); expect(promises.readFile).not.toHaveBeenCalled(); }); test('should read file content', async () => { vi.mocked(existsSync).mockReturnValue(true); vi.spyOn(promises, 'readFile').mockResolvedValue('["hello"]'); const watcher = new JsonWatcher('dummyPath', []); const listener = vi.fn(); watcher.onContentUpdated(listener); watcher.init(); await vi.waitFor(() => { expect(listener).toHaveBeenCalledWith(['hello']); }); expect(promises.readFile).toHaveBeenCalledWith('dummyPath', 'utf-8'); }); describe('file system watcher events should fire onContentUpdated', () => { let onDidCreateListener: () => void; let onDidDeleteListener: () => void; let onDidChangeListener: () => void; beforeEach(() => { vi.spyOn(fs, 'createFileSystemWatcher').mockReturnValue({ onDidCreate: vi.fn().mockImplementation(listener => (onDidCreateListener = listener)), onDidDelete: vi.fn().mockImplementation(listener => (onDidDeleteListener = listener)), onDidChange: vi.fn().mockImplementation(listener => (onDidChangeListener = listener)), } as unknown as FileSystemWatcher); }); test('onDidCreate', async () => { vi.mocked(existsSync).mockReturnValue(false); const watcher = new JsonWatcher('dummyPath', 'dummyDefaultValue'); const listener = vi.fn(); watcher.onContentUpdated(listener); watcher.init(); expect(onDidCreateListener).toBeDefined(); onDidCreateListener(); await vi.waitFor(() => { expect(listener).toHaveBeenNthCalledWith(2, 'dummyDefaultValue'); }); }); test('onDidDeleteListener', async () => { vi.mocked(existsSync).mockReturnValue(false); const watcher = new JsonWatcher('dummyPath', 'dummyDefaultValue'); const listener = vi.fn(); watcher.onContentUpdated(listener); watcher.init(); expect(onDidDeleteListener).toBeDefined(); onDidDeleteListener(); await vi.waitFor(() => { expect(listener).toHaveBeenNthCalledWith(2, 'dummyDefaultValue'); }); }); test('onDidChangeListener', async () => { vi.mocked(existsSync).mockReturnValue(false); const watcher = new JsonWatcher('dummyPath', 'dummyDefaultValue'); const listener = vi.fn(); watcher.onContentUpdated(listener); watcher.init(); expect(onDidChangeListener).toBeDefined(); onDidChangeListener(); await vi.waitFor(() => { expect(listener).toHaveBeenNthCalledWith(2, 'dummyDefaultValue'); }); }); }); ================================================ FILE: packages/backend/src/utils/JsonWatcher.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { type Disposable, type FileSystemWatcher, fs, EventEmitter, type Event } from '@podman-desktop/api'; import { promises, existsSync, mkdirSync } from 'node:fs'; import path from 'node:path'; export class JsonWatcher implements Disposable { #fileSystemWatcher: FileSystemWatcher | undefined; private readonly _onEvent = new EventEmitter(); readonly onContentUpdated: Event = this._onEvent.event; constructor( private path: string, private defaultValue: T, ) {} init(): void { try { // we create the parent directory of the watched content // if the parent directory does not exists, the watcher is not initialized properly mkdirSync(path.dirname(this.path), { recursive: true }); // create file system watcher this.#fileSystemWatcher = fs.createFileSystemWatcher(this.path); // Setup listeners this.#fileSystemWatcher.onDidChange(this.onDidChange.bind(this)); this.#fileSystemWatcher.onDidDelete(this.onDidDelete.bind(this)); this.#fileSystemWatcher.onDidCreate(this.onDidCreate.bind(this)); } catch (err: unknown) { console.error(`unable to watch file ${this.path}, changes won't be detected.`, err); } this.requestUpdate(); } private onDidCreate(): void { this.requestUpdate(); } private onDidDelete(): void { this.requestUpdate(); } private onDidChange(): void { this.requestUpdate(); } private requestUpdate(): void { this.updateContent().catch((err: unknown) => { console.error('Something went wrong in update content', err); }); } private async updateContent(): Promise { if (!existsSync(this.path)) { this._onEvent.fire(this.defaultValue); return; } try { const data = await promises.readFile(this.path, 'utf-8'); this._onEvent.fire(JSON.parse(data)); } catch (err: unknown) { console.error('Something went wrong JsonWatcher', err); } } dispose(): void { this.#fileSystemWatcher?.dispose(); } } ================================================ FILE: packages/backend/src/utils/Publisher.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test, vi } from 'vitest'; import { Publisher } from './Publisher'; import type { RpcExtension } from '@shared/messages/MessageProxy'; import { MSG_TASKS_UPDATE } from '@shared/Messages'; import type { Task } from '@shared/models/ITask'; test('ensure publisher properly use getter', async () => { const rpcExtensionMock = { fire: vi.fn().mockResolvedValue(true) } as unknown as RpcExtension; const fakeTasks = ['dummyValue']; const getterMock = vi.fn().mockReturnValue(fakeTasks); const publisher = new Publisher(rpcExtensionMock, MSG_TASKS_UPDATE, getterMock); publisher.notify(); await vi.waitFor(() => { expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_TASKS_UPDATE, fakeTasks); }); expect(getterMock).toHaveBeenCalled(); }); ================================================ FILE: packages/backend/src/utils/Publisher.ts ================================================ /********************************************************************** * Copyright (C) 2024-2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { RpcChannel, RpcExtension } from '@shared/messages/MessageProxy'; export class Publisher { constructor( private rpcExtension: RpcExtension, private channel: RpcChannel, private getter: () => T, ) {} notify(): void { this.rpcExtension.fire(this.channel, this.getter()).catch((err: unknown) => { console.error(`Something went wrong while emitting ${this.channel}: ${String(err)}`); }); } } ================================================ FILE: packages/backend/src/utils/RecipeConstants.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ export const CONFIG_FILENAME = 'ai-lab.yaml'; // pod labels export const POD_LABEL_RECIPE_ID = 'ai-lab-recipe-id'; export const POD_LABEL_MODEL_ID = 'ai-lab-model-id'; export const POD_LABEL_MODEL_PORTS = 'ai-lab-model-ports'; export const POD_LABEL_APP_PORTS = 'ai-lab-application-ports'; // image labels export const IMAGE_LABEL_RECIPE_ID = 'ai-lab-recipe-id'; export const IMAGE_LABEL_APP_PORTS = 'ai-lab-application-ports'; export const IMAGE_LABEL_MODEL_SERVICE = 'ai-lab-model-service'; export const IMAGE_LABEL_APPLICATION_NAME = 'ai-lab-application-name'; ================================================ FILE: packages/backend/src/utils/arch.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { arch } from 'node:os'; const nodeArch2GoArch = new Map([ ['ia32', '386'], ['x64', 'amd64'], ]); export function goarch(): string { const localArch = arch(); return nodeArch2GoArch.get(localArch) ?? (localArch as string); } ================================================ FILE: packages/backend/src/utils/catalogUtils.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { test, expect, describe } from 'vitest'; import { CatalogFormat, hasCatalogWrongFormat, isNonNullObject, merge, sanitize, sanitizeCategory, sanitizeModel, sanitizeRecipe, } from './catalogUtils'; // Dummy data for testing const validModel = { id: 'model-1', name: 'Test Model', description: 'A test model', }; const validRecipe = { id: 'recipe-1', name: 'Test Recipe', categories: ['category-1'], description: 'A test recipe', repository: 'http://example.com', readme: 'Readme content', }; const validCategory = { id: 'category-1', name: 'Test Category', description: 'A test category', }; describe('sanitize', () => { test('should adapt object not having any version to CURRENT format', () => { const raw = { recipes: [ { id: 'chatbot', description: 'This is a Streamlit chat demo application.', name: 'ChatBot', repository: 'https://github.com/containers/ai-lab-recipes', ref: 'v1.1.3', icon: 'natural-language-processing', categories: ['natural-language-processing'], basedir: 'recipes/natural_language_processing/chatbot', readme: '', models: ['hf.instructlab.granite-7b-lab-GGUF', 'hf.instructlab.merlinite-7b-lab-GGUF'], }, ], models: [ { id: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', name: 'Mistral-7B-Instruct-v0.3-Q4_K_M', description: 'Model imported from path\\Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', hw: 'CPU', file: { path: 'path', file: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', size: 4372812000, creation: '2024-06-19T12:14:12.489Z', }, memory: 4372812000, }, ], }; expect(hasCatalogWrongFormat(raw)).toBeTruthy(); const catalog = sanitize(raw); expect(catalog.version).equals(CatalogFormat.CURRENT); expect(catalog.models[0].backend).equals('llama-cpp'); expect(catalog.models[0].name).equals('Mistral-7B-Instruct-v0.3-Q4_K_M'); }); test('should throw if version is different from CURRENT', () => { const raw = { version: '0.5', recipes: [ { id: 'chatbot', description: 'This is a Streamlit chat demo application.', name: 'ChatBot', repository: 'https://github.com/containers/ai-lab-recipes', ref: 'v1.1.3', icon: 'natural-language-processing', categories: ['natural-language-processing'], basedir: 'recipes/natural_language_processing/chatbot', readme: '', recommended: ['hf.instructlab.granite-7b-lab-GGUF', 'hf.instructlab.merlinite-7b-lab-GGUF'], backend: 'llama-cpp', }, ], models: [ { id: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', name: 'Mistral-7B-Instruct-v0.3-Q4_K_M', description: 'Model imported from path\\Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', hw: 'CPU', file: { path: 'path', file: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', size: 4372812000, creation: '2024-06-19T12:14:12.489Z', }, memory: 4372812000, }, ], }; expect(hasCatalogWrongFormat(raw)).toBeFalsy(); expect(() => sanitize(raw)).toThrowError('the catalog is using an invalid version'); }); test('should return sanitized ApplicationCatalog with valid raw object', () => { const raw = { version: '1.0', recipes: [ { id: 'chatbot', description: 'This is a Streamlit chat demo application.', name: 'ChatBot', repository: 'https://github.com/containers/ai-lab-recipes', ref: 'v1.1.3', icon: 'natural-language-processing', categories: ['natural-language-processing'], basedir: 'recipes/natural_language_processing/chatbot', readme: '', recommended: ['hf.instructlab.granite-7b-lab-GGUF', 'hf.instructlab.merlinite-7b-lab-GGUF'], backend: 'llama-cpp', languages: ['lang1'], frameworks: ['fw1'], }, ], models: [ { id: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', name: 'Mistral-7B-Instruct-v0.3-Q4_K_M', description: 'Model imported from path\\Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', hw: 'CPU', file: { path: 'path', file: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', size: 4372812000, creation: '2024-06-19T12:14:12.489Z', }, memory: 4372812000, }, ], }; expect(hasCatalogWrongFormat(raw)).toBeFalsy(); const catalog = sanitize(raw); expect(catalog.version).equals(CatalogFormat.CURRENT); expect(catalog.models[0].backend).toBeUndefined(); expect(catalog.models[0].name).equals('Mistral-7B-Instruct-v0.3-Q4_K_M'); expect(catalog.recipes[0].languages).toStrictEqual(['lang1']); expect(catalog.recipes[0].frameworks).toStrictEqual(['fw1']); }); }); describe('merge', () => { test('should merge catalogs correctly', () => { const catalogA = { version: CatalogFormat.CURRENT, models: [{ id: 'model-1', name: 'Model A', description: 'Description A' }], recipes: [ { id: 'recipe-1', name: 'Recipe A', categories: ['cat-1'], description: 'Desc A', repository: 'repo', readme: 'readme', }, ], categories: [{ id: 'cat-1', name: 'Category A', description: 'Desc A' }], }; const catalogB = { version: CatalogFormat.CURRENT, models: [{ id: 'model-2', name: 'Model B', description: 'Description B' }], recipes: [ { id: 'recipe-2', name: 'Recipe B', categories: ['cat-2'], description: 'Desc B', repository: 'repo', readme: 'readme', }, ], categories: [{ id: 'cat-2', name: 'Category B', description: 'Desc B' }], }; const merged = merge(catalogA, catalogB); expect(merged.models).toHaveLength(2); expect(merged.recipes).toHaveLength(2); expect(merged.categories).toHaveLength(2); }); test('should throw error on incompatible versions', () => { const catalogA = { version: CatalogFormat.CURRENT, models: [], recipes: [], categories: [] }; const catalogB = { version: CatalogFormat.UNKNOWN, models: [], recipes: [], categories: [] }; expect(() => merge(catalogA, catalogB)).toThrowError('cannot merge incompatible application catalog format.'); }); }); describe('isNonNullObject', () => { test('should return true for non-null objects', () => { expect(isNonNullObject({})).toBe(true); expect(isNonNullObject({ key: 'value' })).toBe(true); }); test('should return false for null or non-object values', () => { expect(isNonNullObject(undefined)).toBe(false); expect(isNonNullObject('string')).toBe(false); expect(isNonNullObject(123)).toBe(false); }); }); describe('sanitizeRecipe', () => { test('undefined object', () => { expect(() => sanitizeRecipe(undefined)).toThrowError('invalid recipe format'); }); test('valid recipe object', () => { expect(sanitizeRecipe(validRecipe)).toEqual(validRecipe); }); test('missing mandatory fields', () => { const invalidRecipe = { ...validRecipe, id: undefined }; expect(() => sanitizeRecipe(invalidRecipe)).toThrowError('invalid recipe format'); }); }); describe('sanitizeModel', () => { test('undefined object', () => { expect(() => sanitizeModel(undefined)).toThrowError('invalid model format'); }); test('valid model object', () => { expect(sanitizeModel(validModel)).toEqual(validModel); }); test('missing mandatory fields', () => { const invalidModel = { ...validModel, id: undefined }; expect(() => sanitizeModel(invalidModel)).toThrowError('invalid model format'); }); }); describe('sanitizeCategory', () => { test('undefined object', () => { expect(() => sanitizeCategory(undefined)).toThrowError('invalid category format'); }); test('valid category object', () => { expect(sanitizeCategory(validCategory)).toEqual(validCategory); }); test('missing mandatory fields', () => { const invalidCategory = { ...validCategory, id: undefined }; expect(() => sanitizeCategory(invalidCategory)).toThrowError('invalid category format'); }); }); ================================================ FILE: packages/backend/src/utils/catalogUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog'; import type { Recipe } from '@shared/models/IRecipe'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { Category } from '@shared/models/ICategory'; import type { LocalModelInfo } from '@shared/models/ILocalModelInfo'; export enum CatalogFormat { CURRENT = '1.0', UNKNOWN = 'unknown', } export function sanitize(rawObject: object): ApplicationCatalog { // if there is no version in the user catalog, we try to adapt it automatically to the CURRENT format let raw: object & { version: string }; if (hasCatalogWrongFormat(rawObject)) { raw = adaptToCurrent(rawObject); } else { raw = rawObject as object & { version: string }; } // ensure version is valid if (raw.version !== CatalogFormat.CURRENT) throw new Error('the catalog is using an invalid version'); return { version: raw.version, recipes: 'recipes' in raw && Array.isArray(raw.recipes) ? raw.recipes.map(recipe => sanitizeRecipe(recipe)) : [], models: 'models' in raw && Array.isArray(raw.models) ? raw.models.map(model => sanitizeModel(model)) : [], categories: 'categories' in raw && Array.isArray(raw.categories) ? raw.categories.map(category => sanitizeCategory(category)) : [], }; } export function hasCatalogWrongFormat(raw: object): boolean { return ( !('version' in raw) || ('recipes' in raw && Array.isArray(raw.recipes) && !!raw.recipes.find(r => 'models' in r)) ); } function adaptToCurrent(raw: object): object & { version: string } { // for recipes - assume backend is llama-cpp and copy models field as recommended if ('recipes' in raw && Array.isArray(raw.recipes)) { raw.recipes.forEach(recipe => { recipe.backend = recipe.backend ?? 'llama-cpp'; recipe.recommended = recipe.recommended ?? recipe.models ?? []; // Copy models to recommended if not present delete recipe.models; // Clear models to avoid duplication }); } // for models - assume backend is llama-cpp if ('models' in raw && Array.isArray(raw.models)) { raw.models.forEach(model => { model.backend = model.backend ?? 'llama-cpp'; }); } return { ...raw, version: CatalogFormat.CURRENT, }; } /** * This method merge catalog A and B, and let the b overwrite a on conflict * @param a * @param b */ export function merge(a: ApplicationCatalog, b: ApplicationCatalog): ApplicationCatalog { if (a.version !== b.version) { throw new Error('cannot merge incompatible application catalog format.'); } return { version: a.version, models: [...a.models.filter(model => !b.models.some(mModel => model.id === mModel.id)), ...b.models] as ModelInfo[], recipes: [...a.recipes.filter(recipe => !b.recipes.some(mRecipe => recipe.id === mRecipe.id)), ...b.recipes], categories: [ ...a.categories.filter(category => !b.categories.some(mCategory => category.id === mCategory.id)), ...b.categories, ], }; } export function isNonNullObject(obj: unknown): obj is object { return !!obj && typeof obj === 'object'; } export function isStringRecord(obj: unknown): obj is Record { return ( isNonNullObject(obj) && Object.entries(obj).every(([key, value]) => typeof key === 'string' && typeof value === 'string') ); } export function isStringArray(obj: unknown): obj is Array { return Array.isArray(obj) && obj.every(item => typeof item === 'string'); } export function sanitizeRecipe(recipe: unknown): Recipe { if ( isNonNullObject(recipe) && 'id' in recipe && typeof recipe.id === 'string' && 'name' in recipe && typeof recipe.name === 'string' && 'categories' in recipe && isStringArray(recipe.categories) && 'description' in recipe && typeof recipe.description === 'string' && 'repository' in recipe && typeof recipe.repository === 'string' && 'readme' in recipe && typeof recipe.readme === 'string' ) return { // mandatory fields id: recipe.id, name: recipe.name, categories: recipe.categories, description: recipe.description, repository: recipe.repository, readme: recipe.readme, // optional fields ref: 'ref' in recipe && typeof recipe.ref === 'string' ? recipe.ref : undefined, icon: 'icon' in recipe && typeof recipe.icon === 'string' ? recipe.icon : undefined, basedir: 'basedir' in recipe && typeof recipe.basedir === 'string' ? recipe.basedir : undefined, recommended: 'recommended' in recipe && isStringArray(recipe.recommended) ? recipe.recommended : undefined, backend: 'backend' in recipe && typeof recipe.backend === 'string' ? recipe.backend : undefined, languages: 'languages' in recipe && isStringArray(recipe.languages) ? recipe.languages : undefined, frameworks: 'frameworks' in recipe && isStringArray(recipe.frameworks) ? recipe.frameworks : undefined, }; throw new Error('invalid recipe format'); } export function isLocalModelInfo(obj: unknown): obj is LocalModelInfo { return ( isNonNullObject(obj) && 'file' in obj && typeof obj.file === 'string' && 'path' in obj && typeof obj.path === 'string' ); } export function sanitizeModel(model: unknown): ModelInfo { if ( isNonNullObject(model) && 'id' in model && typeof model.id === 'string' && 'name' in model && typeof model.name === 'string' && 'description' in model && typeof model.description === 'string' ) return { // mandatory fields id: model.id, name: model.name, description: model.description, // optional fields registry: 'registry' in model && typeof model.registry === 'string' ? model.registry : undefined, license: 'license' in model && typeof model.license === 'string' ? model.license : undefined, url: 'url' in model && typeof model.url === 'string' ? model.url : undefined, memory: 'memory' in model && typeof model.memory === 'number' ? model.memory : undefined, properties: 'properties' in model && isStringRecord(model.properties) ? model.properties : undefined, sha256: 'sha256' in model && typeof model.sha256 === 'string' ? model.sha256 : undefined, backend: 'backend' in model && typeof model.backend === 'string' ? model.backend : undefined, file: 'file' in model && isLocalModelInfo(model.file) ? { ...model.file, creation: new Date(model.file.creation ?? 0), } : undefined, }; throw new Error('invalid model format'); } export function sanitizeCategory(category: unknown): Category { if ( isNonNullObject(category) && 'id' in category && typeof category.id === 'string' && 'name' in category && typeof category.name === 'string' && 'description' in category && typeof category.description === 'string' ) return { // mandatory fields id: category.id, name: category.name, description: category.description, }; throw new Error('invalid category format'); } ================================================ FILE: packages/backend/src/utils/downloader.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { EventEmitter, type Event } from '@podman-desktop/api'; import type { BaseEvent } from '../models/baseEvent'; export abstract class Downloader { protected readonly _onEvent = new EventEmitter(); readonly onEvent: Event = this._onEvent.event; completed: boolean = false; protected constructor( protected url: string, protected target: string, ) {} getTarget(): string { return this.target; } abstract perform(id: string): Promise; } ================================================ FILE: packages/backend/src/utils/imagesUtils.spec.ts ================================================ import { expect, test } from 'vitest'; import type { Recipe } from '@shared/models/IRecipe'; import type { ContainerConfig } from '../models/AIConfig'; import { getImageTag } from './imagesUtils'; test('return recipe-container tag if container image prop is not defined', () => { const recipe = { id: 'recipe1', } as Recipe; const container = { name: 'name', } as ContainerConfig; const imageTag = getImageTag(recipe, container); expect(imageTag).equals('recipe1-name:latest'); }); test('return container image prop is defined', () => { const recipe = { id: 'recipe1', } as Recipe; const container = { name: 'name', image: 'quay.io/repo/image:v1', } as ContainerConfig; const imageTag = getImageTag(recipe, container); expect(imageTag).equals('quay.io/repo/image:v1'); }); test('append latest tag to container image prop if it has no tag', () => { const recipe = { id: 'recipe1', } as Recipe; const container = { name: 'name', image: 'quay.io/repo/image', } as ContainerConfig; const imageTag = getImageTag(recipe, container); expect(imageTag).equals('quay.io/repo/image:latest'); }); ================================================ FILE: packages/backend/src/utils/imagesUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { Recipe } from '@shared/models/IRecipe'; import type { ContainerConfig } from '../models/AIConfig'; export function getImageTag(recipe: Recipe, container: ContainerConfig): string { let tag = container.image ?? `${recipe.id}-${container.name}`; if (!tag.includes(':')) { tag += ':latest'; } return tag; } ================================================ FILE: packages/backend/src/utils/inferenceUtils.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { vi, test, expect, describe, beforeEach } from 'vitest'; import { withDefaultConfiguration, isTransitioning, parseInferenceType, getInferenceType } from './inferenceUtils'; import { getFreeRandomPort } from './ports'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { InferenceServer, InferenceServerStatus } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; import type { ContainerProviderConnectionInfo } from '@shared/models/IContainerConnectionInfo'; vi.mock('./ports', () => ({ getFreeRandomPort: vi.fn(), })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(getFreeRandomPort).mockResolvedValue(8888); }); describe('withDefaultConfiguration', () => { test('zero modelsInfo', async () => { await expect(withDefaultConfiguration({ modelsInfo: [] })).rejects.toThrowError( 'modelsInfo need to contain at least one element.', ); }); test('expect all default values', async () => { const result = await withDefaultConfiguration({ modelsInfo: [{ id: 'dummyId' } as unknown as ModelInfo] }); expect(getFreeRandomPort).toHaveBeenCalledWith('0.0.0.0'); expect(result.port).toBe(8888); expect(result.image).toBe(undefined); expect(result.labels).toStrictEqual({}); expect(result.connection).toBe(undefined); }); test('expect no default values', async () => { const connectionMock = { name: 'Dummy Connection', } as unknown as ContainerProviderConnectionInfo; const result = await withDefaultConfiguration({ modelsInfo: [{ id: 'dummyId' } as unknown as ModelInfo], port: 9999, connection: connectionMock, image: 'random-image', labels: { hello: 'world' }, }); expect(getFreeRandomPort).not.toHaveBeenCalled(); expect(result.port).toBe(9999); expect(result.image).toBe('random-image'); expect(result.labels).toStrictEqual({ hello: 'world' }); expect(result.connection).toBe(connectionMock); }); }); test.each(['stopping', 'deleting', 'starting'] as InferenceServerStatus[])( '%s should be a transitioning state', status => { expect( isTransitioning({ status: status, } as unknown as InferenceServer), ).toBeTruthy(); }, ); test.each(['running', 'stopped', 'error'] as InferenceServerStatus[])('%s should be a stable state', status => { expect( isTransitioning({ status: status, } as unknown as InferenceServer), ).toBeFalsy(); }); describe('parseInferenceType', () => { test('undefined argument should return InferenceType.None', () => { expect(parseInferenceType(undefined)).toBe(InferenceType.NONE); }); test('llama-cpp should return the proper InferenceType.LLAMA_CPP', () => { expect(parseInferenceType('llama-cpp')).toBe(InferenceType.LLAMA_CPP); }); }); describe('getInferenceType', () => { test('empty array should return InferenceType.None', () => { expect(getInferenceType([])).toBe(InferenceType.NONE); }); test('single model with undefined backend should return InferenceType.None', () => { expect( getInferenceType([ { backend: undefined, } as unknown as ModelInfo, ]), ).toBe(InferenceType.NONE); }); test('single model with llamacpp backend should return InferenceType.LLAMA_CPP', () => { expect( getInferenceType([ { backend: 'llama-cpp', } as unknown as ModelInfo, ]), ).toBe(InferenceType.LLAMA_CPP); }); test('multiple model with llamacpp backend should return InferenceType.LLAMA_CPP', () => { expect( getInferenceType([ { backend: 'llama-cpp', }, { backend: 'llama-cpp', }, ] as unknown as ModelInfo[]), ).toBe(InferenceType.LLAMA_CPP); }); test('multiple model with different backend should return InferenceType.None', () => { expect( getInferenceType([ { backend: 'llama-cpp', }, { backend: 'whisper-cpp', }, ] as unknown as ModelInfo[]), ).toBe(InferenceType.NONE); }); }); ================================================ FILE: packages/backend/src/utils/inferenceUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { containerEngine, type ContainerProviderConnection, type ImageInfo, type ListImagesOptions, type PullEvent, } from '@podman-desktop/api'; import type { CreationInferenceServerOptions, InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import { getFreeRandomPort } from './ports'; import { type InferenceServer, InferenceType } from '@shared/models/IInference'; import type { ModelInfo } from '@shared/models/IModelInfo'; export const LABEL_INFERENCE_SERVER: string = 'ai-lab-inference-server'; /** * Given an image name, it will return the ImageInspectInfo corresponding. Will raise an error if not found. * @param connection * @param image * @param callback */ export async function getImageInfo( connection: ContainerProviderConnection, image: string, callback: (event: PullEvent) => void, ): Promise { let imageInfo: ImageInfo | undefined; // Get image inspect imageInfo = ( await containerEngine.listImages({ provider: connection, } as ListImagesOptions) ).find(imageInfo => imageInfo.RepoTags?.some(tag => tag === image)); if (!imageInfo) { try { // Pull image await containerEngine.pullImage(connection, image, callback); // Get image inspect imageInfo = ( await containerEngine.listImages({ provider: connection, } as ListImagesOptions) ).find(imageInfo => imageInfo.RepoTags?.some(tag => tag === image)); } catch (err: unknown) { console.warn('Something went wrong while trying to get image inspect', err); throw err; } } if (imageInfo === undefined) throw new Error(`image ${image} not found.`); return imageInfo; } export async function withDefaultConfiguration( options: CreationInferenceServerOptions, ): Promise { if (options.modelsInfo.length === 0) throw new Error('modelsInfo need to contain at least one element.'); return { port: options.port ?? (await getFreeRandomPort('0.0.0.0')), image: options.image, labels: options.labels ?? {}, modelsInfo: options.modelsInfo, connection: options.connection, inferenceProvider: options.inferenceProvider, gpuLayers: options.gpuLayers ?? 999, }; } export function isTransitioning(server: InferenceServer): boolean { switch (server.status) { case 'deleting': case 'stopping': case 'starting': return true; default: break; } return false; } /** * Given a primitive (string) return the InferenceType enum * @param value */ export function parseInferenceType(value: string | undefined): InferenceType { if (!value) return InferenceType.NONE; return (Object.values(InferenceType) as unknown as string[]).includes(value) ? (value as unknown as InferenceType) : InferenceType.NONE; } /** * Let's collect the backend required by the provided models * we only support one backend for all the models, if multiple are provided, NONE will be return */ export function getInferenceType(modelsInfo: ModelInfo[]): InferenceType { const backends: InferenceType[] = modelsInfo.map(info => parseInferenceType(info.backend)); if (new Set(backends).size !== 1) return InferenceType.NONE; return backends[0]; } ================================================ FILE: packages/backend/src/utils/mcpUtils.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp'; import { Experimental_StdioMCPTransport as StdioClientTransport } from '@ai-sdk/mcp/mcp-stdio'; import { type McpClient, type McpServer, McpServerType } from '@shared/models/McpSettings'; export async function toMcpClients(...mcpServers: McpServer[]): Promise { const clients: McpClient[] = []; for (const server of mcpServers) { switch (server.type) { case McpServerType.SSE: clients.push( await createMCPClient({ name: server.name, transport: { type: 'sse', url: server.url, headers: server.headers, }, }), ); break; case McpServerType.STDIO: clients.push( await createMCPClient({ name: server.name, transport: new StdioClientTransport({ command: server.command, args: server.args, }), }), ); break; } } return clients; } ================================================ FILE: packages/backend/src/utils/modelsUtils.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import { process as apiProcess } from '@podman-desktop/api'; import { deleteRemoteModel, getLocalModelFile, getMountPath, getRemoteModelFile, isModelUploaded, MACHINE_BASE_FOLDER, } from './modelsUtils'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { getPodmanCli } from './podman'; import { join, posix } from 'node:path'; vi.mock('@podman-desktop/api', () => { return { process: { exec: vi.fn(), }, }; }); vi.mock('./podman', () => ({ getPodmanCli: vi.fn(), })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(getPodmanCli).mockReturnValue('dummyPodmanCli'); }); describe('getLocalModelFile', () => { test('file in ModelInfo undefined', () => { expect(() => { getLocalModelFile({ id: 'dummyModelId', file: undefined, } as unknown as ModelInfo); }).toThrowError('model is not available locally.'); }); test('should join path with respect to system host', () => { const path = getLocalModelFile({ id: 'dummyModelId', file: { path: 'dummyPath', file: 'dummy.guff', }, } as unknown as ModelInfo); if (process.platform === 'win32') { expect(path).toBe('dummyPath\\dummy.guff'); } else { expect(path).toBe('dummyPath/dummy.guff'); } }); }); describe('getMountPath', () => { const DUMMY_MODEL: ModelInfo = { id: 'dummyModelId', file: undefined, properties: {}, description: '', name: 'dummy-model', }; const DOWNLOADED_MODEL: ModelInfo & { file: { path: string; file: string } } = { ...DUMMY_MODEL, file: { path: 'dummyPath', file: 'dummy.guff', }, }; const UPLOADED_MODEL: ModelInfo & { file: { path: string; file: string } } = { ...DUMMY_MODEL, file: { path: MACHINE_BASE_FOLDER, file: 'dummy.guff', }, }; test('file in ModelInfo undefined', () => { expect(() => { getMountPath(DUMMY_MODEL); }).toThrowError('model is not available locally.'); }); test('should join path with respect to system host', () => { const path = getMountPath(DOWNLOADED_MODEL); expect(path).toBe(join(DOWNLOADED_MODEL.file.path, DOWNLOADED_MODEL.file.file)); }); test('uploaded model should use posix for join path', () => { const path = getMountPath(UPLOADED_MODEL); expect(path).toBe(posix.join(MACHINE_BASE_FOLDER, UPLOADED_MODEL.file.file)); }); }); describe('getRemoteModelFile', () => { test('file in ModelInfo undefined', () => { expect(() => { getRemoteModelFile({ id: 'dummyModelId', file: undefined, } as unknown as ModelInfo); }).toThrowError('model is not available locally.'); }); test('should join path using posix', () => { const path = getRemoteModelFile({ id: 'dummyModelId', file: { path: 'dummyPath', file: 'dummy.guff', }, } as unknown as ModelInfo); expect(path).toBe(posix.join(MACHINE_BASE_FOLDER, 'dummyModelId')); }); }); describe('isModelUploaded', () => { test('execute stat on targeted machine', async () => { expect( await isModelUploaded('dummyMachine', { id: 'dummyModelId', file: { path: 'dummyPath', file: 'dummy.guff', }, } as unknown as ModelInfo), ).toBeTruthy(); expect(getPodmanCli).toHaveBeenCalled(); expect(apiProcess.exec).toHaveBeenCalledWith('dummyPodmanCli', [ 'machine', 'ssh', 'dummyMachine', 'stat', expect.anything(), ]); }); }); describe('deleteRemoteModel', () => { test('execute stat on targeted machine', async () => { await deleteRemoteModel('dummyMachine', { id: 'dummyModelId', file: { path: 'dummyPath', file: 'dummy.guff', }, } as unknown as ModelInfo); expect(getPodmanCli).toHaveBeenCalled(); expect(apiProcess.exec).toHaveBeenCalledWith('dummyPodmanCli', [ 'machine', 'ssh', 'dummyMachine', 'rm', '-f', expect.anything(), ]); }); }); ================================================ FILE: packages/backend/src/utils/modelsUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ModelInfo } from '@shared/models/IModelInfo'; import { basename, dirname, join, posix } from 'node:path'; import { getPodmanCli } from './podman'; import { process } from '@podman-desktop/api'; import { escapeSpaces } from './pathUtils'; export const MACHINE_BASE_FOLDER = '/home/user/ai-lab/models'; /** * Given a model info object return the path where is it located locally * @param modelInfo */ export function getLocalModelFile(modelInfo: ModelInfo): string { if (modelInfo.file === undefined) throw new Error('model is not available locally.'); return join(modelInfo.file.path, modelInfo.file.file); } /** * Return the path to mount where the model is located * @param modelInfo */ export function getMountPath(modelInfo: ModelInfo): string { if (modelInfo.file === undefined) throw new Error('model is not available locally.'); // if the model is uploaded we need to use posix join if (modelInfo.file.path.startsWith(MACHINE_BASE_FOLDER)) { return posix.join(modelInfo.file.path, modelInfo.file.file); } return join(modelInfo.file.path, modelInfo.file.file); } /** * Given a model info object return the theoretical path where the model * should be in the podman machine * @param modelInfo */ export function getRemoteModelFile(modelInfo: ModelInfo): string { if (modelInfo.file === undefined) throw new Error('model is not available locally.'); return posix.join(MACHINE_BASE_FOLDER, modelInfo.id); } export interface ModelMountInfo { mount: string; suffix?: string; } export function getHuggingFaceModelMountInfo(modelInfo: ModelInfo): ModelMountInfo { const localPath = getLocalModelFile(modelInfo); const mountPath = getMountPath(modelInfo); if (mountPath !== localPath) { return { mount: mountPath, }; } else { const snapShotsFolder = dirname(localPath); const commitHash = basename(localPath); const modelFolder = dirname(snapShotsFolder); const snapShots = basename(snapShotsFolder); return { mount: modelFolder, suffix: `${snapShots}/${commitHash}`, }; } } /** * utility method to determine if a model is already uploaded to the podman machine * @param machine * @param modelInfo */ export async function isModelUploaded(machine: string, modelInfo: ModelInfo): Promise { try { const remotePath = escapeSpaces(getRemoteModelFile(modelInfo)); await process.exec(getPodmanCli(), ['machine', 'ssh', machine, 'stat', remotePath]); return true; } catch (err: unknown) { console.error('Something went wrong while trying to stat remote model path', err); return false; } } /** * Given a machine and a modelInfo, delete the corresponding file on the podman machine * @param machine the machine to target * @param modelInfo the model info */ export async function deleteRemoteModel(machine: string, modelInfo: ModelInfo): Promise { try { const remotePath = getRemoteModelFile(modelInfo); await process.exec(getPodmanCli(), ['machine', 'ssh', machine, 'rm', '-f', remotePath]); } catch (err: unknown) { console.error('Something went wrong while trying to stat remote model path', err); } } export function getModelPropertiesForEnvironment(modelInfo: ModelInfo): string[] { const envs: string[] = []; if (modelInfo.properties) { envs.push( ...Object.entries(modelInfo.properties).map(([key, value]) => { const formattedKey = key.replace(/[A-Z]/g, m => `_${m}`).toUpperCase(); return `MODEL_${formattedKey}=${value}`; }), ); } return envs; } ================================================ FILE: packages/backend/src/utils/pathUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import path from 'node:path'; export function getParentDirectory(filePath: string): string { // Normalize the path to handle different platform-specific separators const normalizedPath = path.normalize(filePath); // Get the directory name using path.dirname return path.dirname(normalizedPath); } export function escapeSpaces(path: string): string { return path.replace(/ /g, '\\ '); } ================================================ FILE: packages/backend/src/utils/podman.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, describe, vi } from 'vitest'; import * as podmanDesktopApi from '@podman-desktop/api'; import * as utils from '../utils/podman'; import type { ProviderConnectionStatus } from '@podman-desktop/api'; const mocks = vi.hoisted(() => { return { getConfigurationMock: vi.fn(), getContainerConnectionsMock: vi.fn(), }; }); const config: podmanDesktopApi.Configuration = { get: mocks.getConfigurationMock, has: () => true, update: () => Promise.resolve(), }; vi.mock('@podman-desktop/api', () => { return { env: { isWindows: false, isLinux: false, }, configuration: { getConfiguration: (): unknown => config, }, containerEngine: { info: vi.fn(), }, navigation: { navigateToResources: vi.fn(), }, provider: { getContainerConnections: mocks.getContainerConnectionsMock, }, process: { exec: vi.fn(), }, }; }); beforeEach(() => { vi.resetAllMocks(); }); describe('getPodmanCli', () => { test('should return custom binary path if setting is set', () => { mocks.getConfigurationMock.mockReturnValue('binary'); const result = utils.getPodmanCli(); expect(result).equals('binary'); }); test('should return exe file if on windows', () => { vi.mocked(podmanDesktopApi.env).isWindows = true; mocks.getConfigurationMock.mockReturnValue(undefined); const result = utils.getPodmanCli(); expect(result).equals('podman.exe'); }); test('should return podman file if not on windows', () => { vi.mocked(podmanDesktopApi.env).isWindows = false; mocks.getConfigurationMock.mockReturnValue(undefined); const result = utils.getPodmanCli(); expect(result).equals('podman'); }); }); describe('getPodmanConnection', () => { test('throw error if there is no podman connection with name', () => { mocks.getContainerConnectionsMock.mockReturnValue([ { connection: { name: 'Podman Machine', status: (): ProviderConnectionStatus => 'started', endpoint: { socketPath: '/endpoint.sock', }, type: 'podman', }, providerId: 'podman', }, ]); expect(() => utils.getPodmanConnection('sample')).toThrowError('no podman connection found with name sample'); }); test('return connection with specified name', () => { mocks.getContainerConnectionsMock.mockReturnValue([ { connection: { name: 'Podman Machine', status: (): ProviderConnectionStatus => 'started', endpoint: { socketPath: '/endpoint.sock', }, type: 'podman', }, providerId: 'podman', }, ]); const engine = utils.getPodmanConnection('Podman Machine'); expect(engine).toBeDefined(); expect(engine.providerId).equals('podman'); expect(engine.connection.name).equals('Podman Machine'); }); }); ================================================ FILE: packages/backend/src/utils/podman.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ContainerProviderConnection, ProviderContainerConnection } from '@podman-desktop/api'; import { configuration, env, provider } from '@podman-desktop/api'; export const MIN_CPUS_VALUE = 4; export type MachineJSON = { Name: string; CPUs: number; Memory: string; DiskSize: string; Running: boolean; Starting: boolean; Default: boolean; UserModeNetworking?: boolean; VMType?: string; }; export function getPodmanCli(): string { // If we have a custom binary path regardless if we are running Windows or not const customBinaryPath = getCustomBinaryPath(); if (customBinaryPath) { return customBinaryPath; } if (env.isWindows) { return 'podman.exe'; } return 'podman'; } // Get the Podman binary path from configuration podman.binary.path // return string or undefined export function getCustomBinaryPath(): string | undefined { return configuration.getConfiguration('podman').get('binary.path'); } /** * In the ${link ContainerProviderConnection.name} property the name is not usage, and we need to transform it * @param connection */ export function getPodmanMachineName(connection: ContainerProviderConnection): string { const runningConnectionName = connection.name; if (runningConnectionName.startsWith('Podman Machine')) { const machineName = runningConnectionName.replace(/Podman Machine\s*/, 'podman-machine-'); if (machineName.endsWith('-')) { return `${machineName}default`; } return machineName; } else { return runningConnectionName; } } /** * @deprecated uses {@link PodmanConnection.getContainerProviderConnection} */ export function getPodmanConnection(connectionName: string): ProviderContainerConnection { const engine = provider .getContainerConnections() .filter(connection => connection.connection.type === 'podman') .find(connection => connection.connection.name === connectionName); if (!engine) { throw new Error(`no podman connection found with name ${connectionName}`); } return engine; } ================================================ FILE: packages/backend/src/utils/podsUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { PodHealth } from '@shared/models/IApplicationState'; export function getPodHealth(infos: (string | undefined)[]): PodHealth { const checked = infos.filter(info => !!info && info !== 'none' && info !== ''); if (!checked.length) { return 'none'; } if (infos.some(info => info === 'unhealthy')) { return 'unhealthy'; } if (infos.some(info => info === 'starting')) { return 'starting'; } return 'healthy'; } ================================================ FILE: packages/backend/src/utils/ports.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import * as net from 'node:net'; export async function getFreeRandomPort(address: string): Promise { const server = net.createServer(); return new Promise((resolve, reject) => server .on('error', (error: NodeJS.ErrnoException) => reject(error)) .on('listening', () => { const addr = server.address(); if (typeof addr === 'string') { // this should not happen, as it is only for pipes and unix domain sockets server.close(() => reject(new Error('error getting allocated port'))); } else if (addr) { // not sure what the call to close will do on the addr value // => the port value is saved before to call close const allocatedPort = addr.port; server.close(() => resolve(allocatedPort)); } else { reject(new Error('invalid server address')); } }) .listen(0, address), ); } export async function getPortsInfo(portDescriptor: string): Promise { const localPort = await getPort(portDescriptor); if (!localPort) { return undefined; } return `${localPort}`; } async function getPort(portDescriptor: string): Promise { let port: number; if (portDescriptor.endsWith('/tcp') || portDescriptor.endsWith('/udp')) { port = parseInt(portDescriptor.substring(0, portDescriptor.length - 4)); } else { port = parseInt(portDescriptor); } // invalid port if (isNaN(port)) { return Promise.resolve(undefined); } try { return await getFreeRandomPort('0.0.0.0'); } catch (e) { console.error(e); return undefined; } } export function getPortsFromLabel(labels: { [key: string]: string }, key: string): number[] { if (!(key in labels)) { return []; } const value = labels[key]; const portsStr = value.split(','); const result: number[] = []; for (const portStr of portsStr) { const port = parseInt(portStr, 10); if (isNaN(port)) { // malformed label, just ignore it return []; } result.push(port); } return result; } ================================================ FILE: packages/backend/src/utils/randomUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ export const getRandomString = (): string => { // eslint-disable-next-line sonarjs/pseudo-random return (Math.random() + 1).toString(36).substring(7); }; export function getRandomName(prefix: string): string { return `${prefix ?? ''}-${new Date().getTime()}`; } ================================================ FILE: packages/backend/src/utils/sha.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi, describe } from 'vitest'; import * as fs from 'node:fs'; import { getHash, hasValidSha } from './sha'; import { Readable } from 'node:stream'; beforeEach(() => { vi.resetAllMocks(); }); test('return true if file has same hash of the expected one', () => { vi.mock('node:fs'); const readable = Readable.from('test'); vi.spyOn(fs, 'createReadStream').mockImplementation(() => { return readable as fs.ReadStream; }); // sha of test => 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 const isValid = hasValidSha('file', '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08'); expect(isValid).toBeTruthy(); }); test('return false if file has different hash of the expected one', () => { vi.mock('node:fs'); const readable = Readable.from('test'); vi.spyOn(fs, 'createReadStream').mockImplementation(() => { return readable as fs.ReadStream; }); // sha of test => 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 const isValid = hasValidSha('file', 'fakeSha'); expect(isValid).toBeTruthy(); }); describe('sha512', () => { test('basic string', () => { const result = getHash('hello-world'); expect(result).toBe( '6aeefc29122a3962c90ef834f6caad0033bffcd62941b7a6205a695cc39e2767db7778a7ad76d173a083b9e14b210dc0212923f481b285c784ab1fe340d7ff4d', ); }); test('very long string', () => { const result = getHash('x'.repeat(1024)); expect(result).toBe( 'fa41ec783342d4c23e7b6550f1e96e32a16269e390449e5fdda60f05611ecb08dd56a5b8cde90024b7da934cdb9a9cc8c8a310eb20e25227699bbf6518e23360', ); }); }); ================================================ FILE: packages/backend/src/utils/sha.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import crypto from 'node:crypto'; import * as fs from 'node:fs'; import { promises } from 'node:stream'; export async function hasValidSha(filePath: string, expectedSha: string): Promise { const checkSum = crypto.createHash('sha256'); const input = fs.createReadStream(filePath); await promises.pipeline(input, checkSum); const actualSha = checkSum.digest('hex'); return actualSha === expectedSha; } export function getHash(content: string): string { return crypto.createHash('sha512').update(content).digest('hex'); } ================================================ FILE: packages/backend/src/utils/uploader.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test, describe, vi } from 'vitest'; import { WSLUploader } from '../workers/uploader/WSLUploader'; import * as podmanDesktopApi from '@podman-desktop/api'; import { beforeEach } from 'node:test'; import { Uploader } from './uploader'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { ContainerProviderConnection } from '@podman-desktop/api'; import { VMType } from '@shared/models/IPodman'; vi.mock('@podman-desktop/api', async () => { return { env: { isWindows: false, }, process: { exec: vi.fn(), }, EventEmitter: vi.fn().mockImplementation(() => { return { fire: vi.fn(), }; }), }; }); const connectionMock: ContainerProviderConnection = { name: 'machine2', type: 'podman', status: () => 'started', vmType: VMType.WSL, endpoint: { socketPath: 'socket.sock', }, }; const uploader = new Uploader(connectionMock, { id: 'dummyModelId', file: { file: 'dummyFile.guff', path: 'localpath', }, } as unknown as ModelInfo); beforeEach(() => { vi.resetAllMocks(); }); describe('perform', () => { test('should return localModelPath if no workers for current system', async () => { vi.mocked(podmanDesktopApi.env).isWindows = false; const result = await uploader.perform('id'); expect(result.startsWith('localpath')).toBeTruthy(); }); test('should return remote path if there is a worker for current system', async () => { vi.spyOn(WSLUploader.prototype, 'perform').mockResolvedValue('remote'); vi.mocked(podmanDesktopApi.env).isWindows = true; const result = await uploader.perform('id'); expect(result).toBe('remote'); }); }); ================================================ FILE: packages/backend/src/utils/uploader.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { EventEmitter, type Event, type ContainerProviderConnection } from '@podman-desktop/api'; import { WSLUploader } from '../workers/uploader/WSLUploader'; import { getDurationSecondsSince } from './utils'; import type { CompletionEvent, BaseEvent } from '../models/baseEvent'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { getLocalModelFile } from './modelsUtils'; import type { IWorker } from '../workers/IWorker'; import type { UploaderOptions } from '../workers/uploader/UploaderOptions'; export class Uploader { readonly #_onEvent = new EventEmitter(); readonly onEvent: Event = this.#_onEvent.event; readonly #workers: IWorker[] = []; constructor( private connection: ContainerProviderConnection, private modelInfo: ModelInfo, private abortSignal?: AbortSignal, ) { this.#workers = [new WSLUploader()]; } /** * Performing the upload action * @param id tracking id * * @return the path to model after the operation (either on the podman machine or local if not compatible) */ async perform(id: string): Promise { // Find the uploader for the current operating system const worker: IWorker | undefined = this.#workers.find(w => w.enabled()); // If none are found, we return the current path if (worker === undefined) { console.warn('There is no workers compatible. Using default local mounting'); this.#_onEvent.fire({ id, status: 'completed', message: `Use local model`, } as CompletionEvent); return getLocalModelFile(this.modelInfo); } try { // measure performance const startTime = performance.now(); // get new path const remotePath = await worker.perform({ connection: this.connection, model: this.modelInfo, }); // compute full time const durationSeconds = getDurationSecondsSince(startTime); // fire events this.#_onEvent.fire({ id, status: 'completed', message: `Duration ${durationSeconds}s.`, duration: durationSeconds, } as CompletionEvent); // return the new path on the podman machine return remotePath; } catch (err) { if (!this.abortSignal?.aborted) { this.#_onEvent.fire({ id, status: 'error', message: `Something went wrong: ${String(err)}.`, }); } else { this.#_onEvent.fire({ id, status: 'canceled', message: `Request cancelled: ${String(err)}.`, }); } throw new Error(`Unable to upload model. Error: ${String(err)}`); } } } ================================================ FILE: packages/backend/src/utils/urldownloader.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { vi, test, expect, beforeEach } from 'vitest'; import { EventEmitter } from '@podman-desktop/api'; import { createWriteStream, existsSync, type WriteStream } from 'node:fs'; import { rename, rm } from 'node:fs/promises'; import https, { type RequestOptions } from 'node:https'; import type { ClientRequest, IncomingMessage } from 'node:http'; import { URLDownloader } from './urldownloader'; vi.mock('@podman-desktop/api', () => { return { EventEmitter: vi.fn(), }; }); vi.mock('node:https', () => { return { default: { get: vi.fn(), }, }; }); vi.mock('node:fs', () => { return { createWriteStream: vi.fn(), existsSync: vi.fn(), }; }); vi.mock('node:fs/promises', () => { return { rename: vi.fn(), rm: vi.fn(), }; }); beforeEach(() => { vi.resetAllMocks(); const listeners: ((value: unknown) => void)[] = []; vi.mocked(EventEmitter).mockReturnValue({ event: vi.fn().mockImplementation(callback => { listeners.push(callback); }), fire: vi.fn().mockImplementation((content: unknown) => { listeners.forEach(listener => listener(content)); }), } as unknown as EventEmitter); vi.mocked(rm).mockResolvedValue(undefined); vi.mocked(rename).mockResolvedValue(undefined); }); test('Downloader constructor', async () => { const downloader = new URLDownloader('dummyUrl', 'dummyTarget'); expect(downloader.getTarget()).toBe('dummyTarget'); }); test('perform download failed', async () => { const downloader = new URLDownloader('dummyUrl', 'dummyTarget'); let onResponse: ((msg: IncomingMessage) => void) | undefined; vi.mocked( https.get as (url: string | URL, options: RequestOptions, callback: (_: IncomingMessage) => void) => ClientRequest, ).mockImplementation((_url, _options, callback) => { onResponse = callback; return {} as unknown as ClientRequest; }); const closeMock = vi.fn(); const onMock = vi.fn(); vi.mocked(createWriteStream).mockReturnValue({ close: closeMock, on: onMock, } as unknown as WriteStream); vi.mocked(existsSync).mockReturnValue(true); onMock.mockImplementation((event: string, callback: (err: Error) => void) => { if (event === 'error') { callback(new Error('dummyError')); } }); // capture downloader event(s) const listenerMock = vi.fn(); downloader.onEvent(listenerMock); const rejectSpy = vi.fn(); // perform download logic (do not wait) downloader.perform('followUpId').catch((e: unknown) => rejectSpy(e)); // wait for listener to be registered await vi.waitFor(() => { expect(onResponse).toBeDefined(); }); if (onResponse === undefined) throw new Error('onResponse undefined'); onResponse({ pipe: vi.fn(), on: vi.fn(), headers: { location: undefined }, } as unknown as IncomingMessage); await vi.waitFor(() => { expect(downloader.completed).toBeTruthy(); }); expect(listenerMock).toHaveBeenCalledWith({ id: 'followUpId', message: 'Something went wrong: dummyError.', status: 'error', }); expect(rm).toHaveBeenCalledWith('dummyTarget.tmp'); expect(rejectSpy).toHaveBeenCalledWith('dummyError'); }); test('perform download successfully', async () => { const downloader = new URLDownloader('dummyUrl', 'dummyTarget'); let onResponse: ((msg: IncomingMessage) => void) | undefined; vi.mocked( https.get as (url: string | URL, options: RequestOptions, callback: (_: IncomingMessage) => void) => ClientRequest, ).mockImplementation((_url, _options, callback) => { onResponse = callback; return {} as unknown as ClientRequest; }); const closeMock = vi.fn(); const onMock = vi.fn(); vi.mocked(createWriteStream).mockReturnValue({ close: closeMock, on: onMock, } as unknown as WriteStream); vi.mocked(existsSync).mockReturnValue(true); onMock.mockImplementation((event: string, callback: () => void) => { if (event === 'finish') { callback(); } }); // capture downloader event(s) const listenerMock = vi.fn(); downloader.onEvent(listenerMock); // perform download logic downloader.perform('followUpId').catch((err: unknown) => console.error(err)); // wait for listener to be registered await vi.waitFor(() => { expect(onResponse).toBeDefined(); }); if (onResponse === undefined) throw new Error('onResponse undefined'); onResponse({ pipe: vi.fn(), on: vi.fn(), headers: { location: undefined }, } as unknown as IncomingMessage); await vi.waitFor(() => { expect(downloader.completed).toBeTruthy(); }); expect(rename).toHaveBeenCalledWith('dummyTarget.tmp', 'dummyTarget'); expect(downloader.completed).toBeTruthy(); expect(listenerMock).toHaveBeenCalledWith({ id: 'followUpId', duration: expect.anything(), message: expect.anything(), status: 'completed', }); expect(rm).not.toHaveBeenCalled(); }); class DownloaderTest extends URLDownloader { public override getRedirect(url: string, location: string): string { return super.getRedirect(url, location); } } const SITE_EXAMPLE = 'https://example.com/hello'; const SITE_DUMMY = 'https://dummy.com/world'; test('redirect should use location if parsable', () => { const downloader = new DownloaderTest(SITE_EXAMPLE, '/home/file.guff'); const result = downloader.getRedirect(SITE_EXAMPLE, SITE_DUMMY); expect(result).toBe(SITE_DUMMY); }); test('redirect should concat base url and location if not parsable', () => { const downloader = new DownloaderTest(SITE_EXAMPLE, '/home/file.guff'); const result = downloader.getRedirect(SITE_EXAMPLE, '/world'); expect(result).toBe('https://example.com/world'); }); ================================================ FILE: packages/backend/src/utils/urldownloader.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { getDurationSecondsSince } from './utils'; import { createWriteStream, existsSync } from 'node:fs'; import { mkdir, rename, rm } from 'node:fs/promises'; import { dirname } from 'node:path'; import crypto from 'node:crypto'; import https from 'node:https'; import type { CompletionEvent, ProgressEvent } from '../models/baseEvent'; import { Downloader } from './downloader'; export class URLDownloader extends Downloader { private requestedIdentifier: string | undefined; constructor( url: string, target: string, private sha256?: string, private abortSignal?: AbortSignal, ) { super(url, target); } async perform(id: string): Promise { //ensure parent folder exists if (!existsSync(dirname(this.target))) { await mkdir(dirname(this.target), { recursive: true }); } this.requestedIdentifier = id; const startTime = performance.now(); try { await this.download(this.url); const durationSeconds = getDurationSecondsSince(startTime); this._onEvent.fire({ id: this.requestedIdentifier, status: 'completed', message: `Duration ${durationSeconds}s.`, duration: durationSeconds, } as CompletionEvent); } catch (err: unknown) { if (!this.abortSignal?.aborted) { this._onEvent.fire({ id: this.requestedIdentifier, status: 'error', message: `Something went wrong: ${String(err)}.`, }); } else { this._onEvent.fire({ id: this.requestedIdentifier, status: 'canceled', message: `Request cancelled: ${String(err)}.`, }); } throw err; } finally { this.completed = true; } } private download(url: string): Promise { return new Promise((resolve, reject) => { const callback = (result: { ok?: boolean; error?: string }): void => { if (result.ok) { resolve(); } else { reject(result.error); } }; this.followRedirects(url, callback); }); } /** * This file takes as argument a location, either a full url or a path * if a path is provided, the url will be used as origin. * @param url * @param location * @protected */ protected getRedirect(url: string, location: string): string { if (URL.canParse(location)) return location; const origin = new URL(url).origin; if (URL.canParse(location, origin)) return new URL(location, origin).href; return location; } private followRedirects(url: string, callback: (message: { ok?: boolean; error?: string }) => void): void { const tmpFile = `${this.target}.tmp`; let totalFileSize = 0; let progress = 0; let previousProgressValue = -1; let checkSum: crypto.Hash; if (this.sha256) { checkSum = crypto.createHash('sha256'); } https.get(url, { signal: this.abortSignal }, resp => { // Determine the total size if (resp.headers.location) { const redirect = this.getRedirect(url, resp.headers.location); this.followRedirects(redirect, callback); return; } if (totalFileSize === 0 && resp.headers['content-length']) { totalFileSize = parseFloat(resp.headers['content-length']); } const stream = createWriteStream(tmpFile, { signal: this.abortSignal, }); // Capture potential errors resp.on('error', (err: Error) => { stream.destroy(err); // propagate to stream }); // On data resp.on('data', chunk => { if (checkSum) { checkSum.update(chunk); } progress += chunk.length; const progressValue = (progress * 100) / totalFileSize; // Only fire events for progress greater than 1 if (progressValue === 100 || progressValue - previousProgressValue > 1) { previousProgressValue = progressValue; this._onEvent.fire({ id: this.requestedIdentifier, status: 'progress', value: progressValue, total: totalFileSize, } as ProgressEvent); } }); // Pipe to stream resp.pipe(stream); // Handle error case stream.on('error', (err: Error) => { rm(tmpFile) .then(() => { callback({ error: err.message, }); }) .catch((err: unknown) => { console.error(`Something went wrong while trying to delete ${tmpFile}`, err); }); }); // On close event stream.on('finish', () => { // check if _parent_ is errored if (resp.errored) { return; } if (checkSum) { const actualSha = checkSum.digest('hex'); if (this.sha256 !== actualSha) { callback({ error: `The file's security hash (SHA-256) does not match the expected value. The file may have been altered or corrupted during the download process`, }); rm(tmpFile).catch((err: unknown) => { console.error(`Something went wrong while trying to delete ${tmpFile}`, err); }); return; } } // If everything is fine we simply rename the tmp file to the expected one rename(tmpFile, this.target) .then(() => { callback({ ok: true }); }) .catch((err: unknown) => { callback({ error: `Something went wrong while trying to rename downloaded file: ${String(err)}.` }); }); }); }); } } ================================================ FILE: packages/backend/src/utils/utils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import * as http from 'node:http'; export async function timeout(time: number): Promise { return new Promise(resolve => { setTimeout(resolve, time); }); } export async function isEndpointAlive(endPoint: string): Promise { return new Promise(resolve => { const req = http.get(endPoint, res => { res.on('data', () => { // do nothing }); res.on('end', () => { console.log(res); if (res.statusCode === 200) { resolve(true); } else { resolve(false); } }); }); req.once('error', err => { console.log('Error while pinging endpoint', err); resolve(false); }); }); } export function getDurationSecondsSince(startTimeMs: number): number { return Math.round((performance.now() - startTimeMs) / 1000); } export const DISABLE_SELINUX_LABEL_SECURITY_OPTION = 'label=disable'; ================================================ FILE: packages/backend/src/webviewUtils.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, expect, test, vi } from 'vitest'; import { initWebview } from './webviewUtils'; import type { Uri } from '@podman-desktop/api'; import { type PathLike, promises } from 'node:fs'; vi.mock('@podman-desktop/api', async () => { return { Uri: class { static readonly joinPath = (): unknown => ({ fsPath: '.' }); }, window: { createWebviewPanel: (): unknown => ({ webview: { html: '', onDidReceiveMessage: vi.fn(), postMessage: vi.fn(), asWebviewUri: () => 'dummy-src', }, onDidChangeViewState: vi.fn(), }), }, }; }); vi.mock('node:fs', () => ({ promises: { readFile: vi.fn(), }, })); beforeEach(() => { vi.resetAllMocks(); }); test('panel should have file content as html', async () => { vi.mocked(promises.readFile as (path: PathLike) => Promise).mockImplementation(() => { return Promise.resolve(''); }); const panel = await initWebview({} as unknown as Uri); expect(panel.webview.html).toBe(''); }); test('script src should be replaced with asWebviewUri result', async () => { vi.mocked(promises.readFile as (path: PathLike) => Promise).mockImplementation(() => { return Promise.resolve(''); }); const panel = await initWebview({} as unknown as Uri); expect(panel.webview.html).toBe(''); }); test('links src should be replaced with asWebviewUri result', async () => { vi.mocked(promises.readFile as (path: PathLike) => Promise).mockImplementation(() => { return Promise.resolve(''); }); const panel = await initWebview({} as unknown as Uri); expect(panel.webview.html).toBe(''); }); ================================================ FILE: packages/backend/src/webviewUtils.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { Uri, type WebviewOptions, type WebviewPanel, window } from '@podman-desktop/api'; import { promises } from 'node:fs'; function getWebviewOptions(extensionUri: Uri): WebviewOptions { return { // Enable javascript in the webview // enableScripts: true, // And restrict the webview to only loading content from our extension's `media` directory. localResourceRoots: [Uri.joinPath(extensionUri, 'media')], }; } export async function initWebview(extensionUri: Uri): Promise { // register webview const panel = window.createWebviewPanel('studio', 'AI Lab', getWebviewOptions(extensionUri)); // update html const indexHtmlUri = Uri.joinPath(extensionUri, 'media', 'index.html'); const indexHtmlPath = indexHtmlUri.fsPath; let indexHtml = await promises.readFile(indexHtmlPath, 'utf8'); // replace links with webView Uri links // in the content replace src with webview.asWebviewUri // eslint-disable-next-line sonarjs/slow-regex const scriptLink = indexHtml.match(//g); if (scriptLink) { scriptLink.forEach(link => { const src = RegExp(/src="(.*?)"/).exec(link); if (src) { const webviewSrc = panel.webview.asWebviewUri(Uri.joinPath(extensionUri, 'media', src[1])); if (!webviewSrc) throw new Error('undefined webviewSrc'); indexHtml = indexHtml.replace(src[1], webviewSrc.toString()); } }); } // and now replace for css file as well // eslint-disable-next-line sonarjs/slow-regex const cssLink = indexHtml.match(//g); if (cssLink) { cssLink.forEach(link => { const href = RegExp(/href="(.*?)"/).exec(link); if (href) { const webviewHref = panel.webview.asWebviewUri(Uri.joinPath(extensionUri, 'media', href[1])); if (!webviewHref) throw new Error('Something went wrong while replacing links with webView Uri links: undefined webviewHref'); indexHtml = indexHtml.replace(href[1], webviewHref.toString()); } }); } panel.webview.html = indexHtml; return panel; } ================================================ FILE: packages/backend/src/workers/IWorker.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ export interface IWorker { enabled(): boolean; perform(args: T): Promise; } ================================================ FILE: packages/backend/src/workers/WindowsWorker.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { env } from '@podman-desktop/api'; import type { IWorker } from './IWorker'; export abstract class WindowsWorker implements IWorker { enabled(): boolean { return env.isWindows; } abstract perform(content: T): Promise; } ================================================ FILE: packages/backend/src/workers/provider/InferenceProvider.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { type BetterContainerCreateResult, InferenceProvider } from './InferenceProvider'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import type { ContainerCreateOptions, ContainerProviderConnection, ImageInfo } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import { getImageInfo } from '../../utils/inferenceUtils'; import type { TaskState } from '@shared/models/ITask'; import type { InferenceServer } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; vi.mock('../../utils/inferenceUtils', () => ({ getImageInfo: vi.fn(), LABEL_INFERENCE_SERVER: 'ai-lab-inference-server', })); vi.mock('@podman-desktop/api', () => ({ containerEngine: { createContainer: vi.fn(), }, })); const DummyImageInfo: ImageInfo = { Id: 'dummy-image-id', engineId: 'dummy-engine-id', } as unknown as ImageInfo; const taskRegistry: TaskRegistry = { createTask: vi.fn(), updateTask: vi.fn(), } as unknown as TaskRegistry; const connectionMock: ContainerProviderConnection = { name: 'Dummy Connection', type: 'podman', } as unknown as ContainerProviderConnection; class TestInferenceProvider extends InferenceProvider { constructor() { super(taskRegistry, InferenceType.NONE, 'test-inference-provider'); } enabled(): boolean { throw new Error('not implemented'); } publicPullImage( connection: ContainerProviderConnection, image: string, labels: { [id: string]: string }, ): Promise { return super.pullImage(connection, image, labels); } async publicCreateContainer( engineId: string, containerCreateOptions: ContainerCreateOptions, labels: { [id: string]: string } = {}, ): Promise { const result = await this.createContainer(engineId, containerCreateOptions, labels); return { id: result.id, engineId: engineId, }; } async perform(_config: InferenceServerConfig): Promise { throw new Error('not implemented'); } dispose(): void {} } beforeEach(() => { vi.resetAllMocks(); vi.mocked(getImageInfo).mockResolvedValue(DummyImageInfo); vi.mocked(taskRegistry.createTask).mockImplementation( (name: string, state: TaskState, labels: { [id: string]: string } = {}) => ({ id: 'dummy-task-id', name: name, state: state, labels: labels, }), ); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'dummy-container-id', engineId: 'dummy-engine-id', }); }); describe('pullImage', () => { test('should create a task and mark as success on completion', async () => { const provider = new TestInferenceProvider(); await provider.publicPullImage(connectionMock, 'dummy-image', { key: 'value', }); expect(taskRegistry.createTask).toHaveBeenCalledWith('Pulling dummy-image.', 'loading', { key: 'value', }); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'dummy-task-id', name: 'Pulling dummy-image.', labels: { key: 'value', }, state: 'success', }); }); test('should mark the task as error when pulling failed', async () => { const provider = new TestInferenceProvider(); vi.mocked(getImageInfo).mockRejectedValue(new Error('dummy test error')); await expect( provider.publicPullImage(connectionMock, 'dummy-image', { key: 'value', }), ).rejects.toThrowError('dummy test error'); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'dummy-task-id', name: 'Pulling dummy-image.', labels: { key: 'value', }, state: 'error', error: 'Something went wrong while pulling dummy-image: Error: dummy test error', }); }); }); describe('createContainer', () => { test('should create a task and mark as success on completion', async () => { const provider = new TestInferenceProvider(); await provider.publicCreateContainer( 'dummy-engine-id', { name: 'dummy-container-name', }, { key: 'value', }, ); expect(taskRegistry.createTask).toHaveBeenCalledWith('Creating container.', 'loading', { key: 'value', }); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'dummy-task-id', name: 'Creating container.', labels: { key: 'value', }, state: 'success', }); }); test('should mark the task as error when creation failed', async () => { const provider = new TestInferenceProvider(); vi.mocked(containerEngine.createContainer).mockRejectedValue(new Error('dummy test error')); await expect( provider.publicCreateContainer( 'dummy-provider-id', { name: 'dummy-container-name', }, { key: 'value', }, ), ).rejects.toThrowError('dummy test error'); expect(taskRegistry.updateTask).toHaveBeenCalledWith({ id: 'dummy-task-id', name: 'Creating container.', labels: { key: 'value', }, state: 'error', error: 'Something went wrong while creating container: Error: dummy test error', }); }); }); ================================================ FILE: packages/backend/src/workers/provider/InferenceProvider.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ContainerCreateOptions, ContainerCreateResult, ContainerProviderConnection, Disposable, ImageInfo, PullEvent, } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import type { IWorker } from '../IWorker'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { getImageInfo } from '../../utils/inferenceUtils'; import type { InferenceServer, InferenceType } from '@shared/models/IInference'; export type BetterContainerCreateResult = ContainerCreateResult & { engineId: string }; export abstract class InferenceProvider implements IWorker, Disposable { readonly type: InferenceType; readonly name: string; protected constructor( private taskRegistry: TaskRegistry, type: InferenceType, name: string, ) { this.type = type; this.name = name; } abstract enabled(): boolean; prePerform(_config: InferenceServerConfig): Promise { return Promise.resolve(); } abstract perform(config: InferenceServerConfig): Promise; abstract dispose(): void; protected async createContainer( engineId: string, containerCreateOptions: ContainerCreateOptions, labels: { [id: string]: string }, ): Promise { const containerTask = this.taskRegistry.createTask(`Creating container.`, 'loading', labels); try { const result = await containerEngine.createContainer(engineId, containerCreateOptions); // update the task containerTask.state = 'success'; containerTask.progress = undefined; // return the ContainerCreateResult return { id: result.id, engineId: engineId, }; } catch (err: unknown) { containerTask.state = 'error'; containerTask.progress = undefined; containerTask.error = `Something went wrong while creating container: ${String(err)}`; throw err; } finally { this.taskRegistry.updateTask(containerTask); } } /** * This method allows to pull the image, while creating a task for the user to follow progress * @param connection * @param image * @param labels * @protected */ protected pullImage( connection: ContainerProviderConnection, image: string, labels: { [id: string]: string }, ): Promise { // Creating a task to follow pulling progress const pullingTask = this.taskRegistry.createTask(`Pulling ${image}.`, 'loading', labels); // get the default image info for this provider return getImageInfo(connection, image, (_event: PullEvent) => {}) .catch((err: unknown) => { pullingTask.state = 'error'; pullingTask.progress = undefined; pullingTask.error = `Something went wrong while pulling ${image}: ${String(err)}`; throw err; }) .then(imageInfo => { pullingTask.state = 'success'; pullingTask.progress = undefined; return imageInfo; }) .finally(() => { this.taskRegistry.updateTask(pullingTask); }); } } ================================================ FILE: packages/backend/src/workers/provider/LlamaCppPython.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { LlamaCppPython, SECOND } from './LlamaCppPython'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { getImageInfo, LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import type { ContainerProviderConnection, ImageInfo } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import type { GPUManager } from '../../managers/GPUManager'; import type { PodmanConnection } from '../../managers/podmanConnection'; import { VMType } from '@shared/models/IPodman'; import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry'; import { GPUVendor } from '@shared/models/IGPUInfo'; import type { InferenceServer } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; import { llamacpp } from '../../assets/inference-images.json'; import type { ContainerProviderConnectionInfo } from '@shared/models/IContainerConnectionInfo'; import { join } from 'node:path'; vi.mock('@podman-desktop/api', () => ({ containerEngine: { createContainer: vi.fn(), }, })); vi.mock('../../utils/inferenceUtils', () => ({ getProviderContainerConnection: vi.fn(), getImageInfo: vi.fn(), LABEL_INFERENCE_SERVER: 'ai-lab-inference-server', })); const taskRegistry: TaskRegistry = { createTask: vi.fn(), updateTask: vi.fn(), } as unknown as TaskRegistry; const gpuManager: GPUManager = { collectGPUs: vi.fn(), } as unknown as GPUManager; const DummyModel: ModelInfo = { name: 'dummy model', id: 'dummy-model-id', file: { file: 'dummy-file.guff', path: 'dummy-path', }, properties: {}, description: 'dummy-desc', }; const dummyConnection: ContainerProviderConnection = { name: 'dummy-provider-connection', type: 'podman', vmType: VMType.WSL, status: () => 'started', endpoint: { socketPath: 'dummy-socket', }, }; const DummyImageInfo: ImageInfo = { Id: 'dummy-image-id', engineId: 'dummy-engine-id', } as unknown as ImageInfo; const podmanConnection: PodmanConnection = { findRunningContainerProviderConnection: vi.fn(), getContainerProviderConnection: vi.fn(), } as unknown as PodmanConnection; const configurationRegistry: ConfigurationRegistry = { getExtensionConfiguration: vi.fn(), } as unknown as ConfigurationRegistry; beforeEach(() => { vi.resetAllMocks(); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: false, modelsPath: 'model-path', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue(dummyConnection); vi.mocked(podmanConnection.getContainerProviderConnection).mockReturnValue(dummyConnection); vi.mocked(getImageInfo).mockResolvedValue(DummyImageInfo); vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'dummy-task-id', name: '', labels: {}, state: 'loading' }); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'dummy-container-id', engineId: 'dummy-engine-id', }); }); test('LlamaCppPython being the default, it should always be enable', () => { const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); expect(provider.enabled()).toBeTruthy(); }); describe('perform', () => { test('config without image should use defined image', async () => { const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(podmanConnection.findRunningContainerProviderConnection).toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(dummyConnection, llamacpp.default, expect.anything()); }); test('config without models should throw an error', async () => { const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await expect( provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [], connection: undefined, }), ).rejects.toThrowError('Need at least one model info to start an inference server.'); }); test('config model without file should throw an error', async () => { const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await expect( provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [ { id: 'invalid', } as unknown as ModelInfo, ], connection: undefined, }), ).rejects.toThrowError('The model info file provided is undefined'); }); test('valid config should produce expected CreateContainerOptions', async () => { const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8888, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(server).toStrictEqual({ container: { containerId: 'dummy-container-id', engineId: DummyImageInfo.engineId, }, labels: { [LABEL_INFERENCE_SERVER]: `["${DummyModel.id}"]`, api: 'http://localhost:8888/v1', docs: 'http://localhost:10434/api-docs/8888', }, models: [DummyModel], status: 'running', type: InferenceType.LLAMA_CPP, connection: { port: 8888, }, }); expect(containerEngine.createContainer).toHaveBeenCalledWith(DummyImageInfo.engineId, { Cmd: [], Detach: true, Env: ['MODEL_PATH=/models/dummy-file.guff', 'HOST=0.0.0.0', 'PORT=8000'], ExposedPorts: { '8888': {}, }, HealthCheck: { Interval: SECOND * 5, Retries: 20, Test: ['CMD-SHELL', 'curl -sSf localhost:8000 > /dev/null'], }, HostConfig: { AutoRemove: false, Mounts: [ { Source: join('dummy-path', 'dummy-file.guff'), Target: '/models/dummy-file.guff', Type: 'bind', }, ], DeviceRequests: [], Devices: [], PortBindings: { '8000/tcp': [ { HostPort: '8888', }, ], }, SecurityOpt: ['label=disable'], }, Image: DummyImageInfo.Id, Labels: { [LABEL_INFERENCE_SERVER]: `["${DummyModel.id}"]`, api: 'http://localhost:8888/v1', docs: 'http://localhost:10434/api-docs/8888', }, }); }); test('model properties should be made uppercased', async () => { const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [ { ...DummyModel, properties: { basicProp: 'basicProp', lotOfCamelCases: 'lotOfCamelCases', lowercase: 'lowercase', chatFormat: 'dummyChatFormat', }, }, ], connection: undefined, }); expect(containerEngine.createContainer).toHaveBeenCalledWith(DummyImageInfo.engineId, { Env: expect.arrayContaining([ 'MODEL_BASIC_PROP=basicProp', 'MODEL_LOT_OF_CAMEL_CASES=lotOfCamelCases', 'MODEL_LOWERCASE=lowercase', 'MODEL_CHAT_FORMAT=dummyChatFormat', ]), Cmd: expect.anything(), HealthCheck: expect.anything(), HostConfig: expect.anything(), ExposedPorts: expect.anything(), Labels: expect.anything(), Image: DummyImageInfo.Id, Detach: true, }); }); test('gpu experimental should collect GPU data', async () => { vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'nvidia', vendor: GPUVendor.NVIDIA, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(containerEngine.createContainer).toHaveBeenCalledWith( DummyImageInfo.engineId, expect.objectContaining({ Cmd: [ '-c', '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && /usr/bin/llama-server.sh', ], }), ); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function)); expect('gpu' in server.labels).toBeTruthy(); expect(server.labels['gpu']).toBe('nvidia'); }); test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => { vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'dummy-model', vendor: GPUVendor.UNKNOWN, }, { vram: 1024, model: 'nvidia', vendor: GPUVendor.NVIDIA, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(containerEngine.createContainer).toHaveBeenCalledWith( DummyImageInfo.engineId, expect.objectContaining({ Cmd: [ '-c', '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && /usr/bin/llama-server.sh', ], }), ); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function)); expect('gpu' in server.labels).toBeTruthy(); expect(server.labels['gpu']).toBe('nvidia'); }); test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => { vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'nvidia', vendor: GPUVendor.NVIDIA, }, { vram: 1024, model: 'dummy-model', vendor: GPUVendor.UNKNOWN, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(containerEngine.createContainer).toHaveBeenCalledWith( DummyImageInfo.engineId, expect.objectContaining({ Cmd: [ '-c', '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && /usr/bin/llama-server.sh', ], }), ); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function)); expect('gpu' in server.labels).toBeTruthy(); expect(server.labels['gpu']).toBe('nvidia'); }); test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => { vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'dummy-model', vendor: GPUVendor.UNKNOWN, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect('gpu' in server.labels).toBeFalsy(); }); test('LIBKRUN vmtype should uses llamacpp.default image with gpu layers 999', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ ...dummyConnection, vmType: VMType.LIBKRUN, }); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'dummy-model', vendor: GPUVendor.APPLE, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.default, expect.any(Function)); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect('gpu' in server.labels).toBeTruthy(); expect(containerEngine.createContainer).toHaveBeenCalledWith( DummyImageInfo.engineId, expect.objectContaining({ Env: expect.arrayContaining(['GPU_LAYERS=999']), }), ); }); test('UNKNOWN vmtype should use llamacpp.default image - if not gpu accelerated', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ ...dummyConnection, vmType: VMType.UNKNOWN, }); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'dummy-model', vendor: GPUVendor.UNKNOWN, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.default, expect.any(Function)); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect('gpu' in server.labels).toBeFalsy(); }); test('UNKNOWN vmtype should use llamacpp.cuda image - if gpu accelerated and cdi configured', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ ...dummyConnection, vmType: VMType.UNKNOWN, }); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'dummy-model', vendor: GPUVendor.NVIDIA, }, ]); class CDILlamaCppPython extends LlamaCppPython { override isNvidiaCDIConfigured(): boolean { return true; } } const provider = new CDILlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function)); expect('gpu' in server.labels).toBeTruthy(); }); test('WSL vmtype with Intel GPU should use llamacpp.intel image and no custom entrypoint', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ ...dummyConnection, vmType: VMType.WSL, }); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'intel-gpu', vendor: GPUVendor.INTEL, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.intel, expect.any(Function)); expect(containerEngine.createContainer).toHaveBeenCalledWith( DummyImageInfo.engineId, expect.objectContaining({ Entrypoint: undefined, Cmd: [], Env: expect.arrayContaining(['ZES_ENABLE_SYSMAN=1']), }), ); }); test('UNKNOWN vmtype with Intel GPU should use llamacpp.intel image and no custom entrypoint', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ ...dummyConnection, vmType: VMType.UNKNOWN, }); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'intel-gpu', vendor: GPUVendor.INTEL, }, ]); const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.intel, expect.any(Function)); expect(containerEngine.createContainer).toHaveBeenCalledWith( DummyImageInfo.engineId, expect.objectContaining({ Entrypoint: undefined, User: '0', Cmd: [], HostConfig: expect.objectContaining({ DeviceRequests: [], Devices: expect.arrayContaining([ expect.objectContaining({ PathOnHost: '/dev/dri', }), ]), }), }), ); }); test('UNKNOWN vmtype should use llamacpp.default image - if gpu but cdi not configured', async () => { vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({ ...dummyConnection, vmType: VMType.UNKNOWN, }); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, modelsPath: '', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ { vram: 1024, model: 'dummy-model', vendor: GPUVendor.NVIDIA, }, ]); class NoCDILlamaCppPython extends LlamaCppPython { override isNvidiaCDIConfigured(): boolean { return false; } } const provider = new NoCDILlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); const server = await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.default, expect.any(Function)); expect(gpuManager.collectGPUs).toHaveBeenCalled(); expect('gpu' in server.labels).toBeFalsy(); }); test('provided connection should be used for pulling the image', async () => { const connection: ContainerProviderConnectionInfo = { name: 'Dummy Podman', type: 'podman', vmType: VMType.WSL, status: 'started', providerId: 'fakeProviderId', }; const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: connection, }); expect(podmanConnection.getContainerProviderConnection).toHaveBeenCalledWith(connection); expect(podmanConnection.findRunningContainerProviderConnection).not.toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(dummyConnection, llamacpp.default, expect.anything()); }); }); ================================================ FILE: packages/backend/src/workers/provider/LlamaCppPython.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ContainerCreateOptions, ContainerProviderConnection, DeviceRequest, ImageInfo, MountConfig, } from '@podman-desktop/api'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import { InferenceProvider } from './InferenceProvider'; import { getModelPropertiesForEnvironment, getMountPath } from '../../utils/modelsUtils'; import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils'; import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { type InferenceServer, InferenceType } from '@shared/models/IInference'; import type { GPUManager } from '../../managers/GPUManager'; import { GPUVendor, type IGPUInfo } from '@shared/models/IGPUInfo'; import { VMType } from '@shared/models/IPodman'; import type { PodmanConnection } from '../../managers/podmanConnection'; import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry'; import { llamacpp } from '../../assets/inference-images.json'; import * as fs from 'node:fs'; export const SECOND: number = 1_000_000_000; interface Device { PathOnHost: string; PathInContainer: string; CgroupPermissions: string; } export class LlamaCppPython extends InferenceProvider { constructor( taskRegistry: TaskRegistry, private podmanConnection: PodmanConnection, private gpuManager: GPUManager, private configurationRegistry: ConfigurationRegistry, ) { super(taskRegistry, InferenceType.LLAMA_CPP, 'LLama-cpp'); } dispose(): void {} public enabled = (): boolean => true; protected async getContainerCreateOptions( config: InferenceServerConfig, imageInfo: ImageInfo, vmType: VMType, gpu?: IGPUInfo, ): Promise { if (config.modelsInfo.length === 0) throw new Error('Need at least one model info to start an inference server.'); if (config.modelsInfo.length > 1) { throw new Error('Currently the inference server does not support multiple models serving.'); } const modelInfo = config.modelsInfo[0]; if (modelInfo.file === undefined) { throw new Error('The model info file provided is undefined'); } const labels: Record = { ...config.labels, [LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)), }; // get model mount settings const filename = getMountPath(modelInfo); const target = `/models/${modelInfo.file.file}`; // mount the file directory to avoid adding other files to the containers const mounts: MountConfig = [ { Target: target, Source: filename, Type: 'bind', }, ]; // provide envs const envs: string[] = [`MODEL_PATH=${target}`, 'HOST=0.0.0.0', 'PORT=8000']; envs.push(...getModelPropertiesForEnvironment(modelInfo)); const deviceRequests: DeviceRequest[] = []; const devices: Device[] = []; let entrypoint: string | undefined = undefined; let cmd: string[] = []; let user: string | undefined = undefined; if (gpu) { let supported: boolean = false; switch (vmType) { case VMType.WSL: if (gpu.vendor === GPUVendor.NVIDIA) { supported = true; mounts.push({ Target: '/usr/lib/wsl', Source: '/usr/lib/wsl', Type: 'bind', }); devices.push({ PathOnHost: '/dev/dxg', PathInContainer: '/dev/dxg', CgroupPermissions: 'r', }); user = '0'; entrypoint = '/usr/bin/sh'; cmd = [ '-c', '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && /usr/bin/llama-server.sh', ]; } else if (gpu.vendor === GPUVendor.INTEL) { supported = true; mounts.push({ Target: '/usr/lib/wsl', Source: '/usr/lib/wsl', Type: 'bind', }); devices.push({ PathOnHost: '/dev/dxg', PathInContainer: '/dev/dxg', CgroupPermissions: 'r', }); user = '0'; } break; case VMType.LIBKRUN: case VMType.LIBKRUN_LABEL: supported = true; devices.push({ PathOnHost: '/dev/dri', PathInContainer: '/dev/dri', CgroupPermissions: '', }); break; case VMType.UNKNOWN: // This is linux with podman locally installed // Linux GPU support currently requires NVIDIA GPU with CDI configured if (this.isNvidiaCDIConfigured(gpu)) { supported = true; devices.push({ PathOnHost: 'nvidia.com/gpu=all', PathInContainer: '', CgroupPermissions: '', }); user = '0'; } else if (gpu.vendor === GPUVendor.INTEL) { // Intel GPU support via /dev/dri device passthrough supported = true; devices.push({ PathOnHost: '/dev/dri', PathInContainer: '/dev/dri', CgroupPermissions: 'rwm', }); user = '0'; } break; } // adding gpu capabilities in supported architectures if (supported) { if (gpu.vendor !== GPUVendor.INTEL || vmType !== VMType.UNKNOWN) { deviceRequests.push({ Capabilities: [['gpu']], Count: -1, // -1: all }); } // label the container labels['gpu'] = gpu.model; envs.push(`GPU_LAYERS=${config.gpuLayers ?? 999}`); // Add Intel-specific environment variables if (gpu.vendor === GPUVendor.INTEL) { envs.push('ZES_ENABLE_SYSMAN=1'); // Add the library path for the Unified Memory Framework (UMF) which is required for the Level Zero adapter // This is a workaround for the missing LD_LIBRARY_PATH in the ramalama image envs.push('LD_LIBRARY_PATH=/opt/intel/oneapi/umf/0.11/lib/'); } } else { console.warn(`gpu ${gpu.model} is not supported on ${vmType}.`); } } // add the link to our openAPI instance using the instance as the host const aiLabPort = this.configurationRegistry.getExtensionConfiguration().apiPort; // add in the URL the port of the inference server const aiLabDocsLink = `http://localhost:${aiLabPort}/api-docs/${config.port}`; // adding labels to inference server labels['docs'] = aiLabDocsLink; labels['api'] = `http://localhost:${config.port}/v1`; return { Image: imageInfo.Id, Detach: true, Entrypoint: entrypoint, User: user, ExposedPorts: { [`${config.port}`]: {} }, HostConfig: { AutoRemove: false, Devices: devices, Mounts: mounts, DeviceRequests: deviceRequests, SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION], PortBindings: { '8000/tcp': [ { HostPort: `${config.port}`, }, ], }, }, HealthCheck: { // must be the port INSIDE the container not the exposed one Test: ['CMD-SHELL', `curl -sSf localhost:8000 > /dev/null`], Interval: SECOND * 5, Retries: 4 * 5, }, Labels: labels, Env: envs, Cmd: cmd, }; } async perform(config: InferenceServerConfig): Promise { if (!this.enabled()) throw new Error('not enabled'); let gpu: IGPUInfo | undefined = undefined; // get the first GPU if option is enabled if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) { const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs(); if (gpus.length === 0) throw new Error('no gpu was found.'); // Look for a GPU that is of a known type, use the first one found. // Fall back to the first one if no GPUs are of known type. gpu = gpus.find(({ vendor }) => vendor !== GPUVendor.UNKNOWN) ?? gpus[0]; } let connection: ContainerProviderConnection | undefined = undefined; if (config.connection) { connection = this.podmanConnection.getContainerProviderConnection(config.connection); } else { connection = this.podmanConnection.findRunningContainerProviderConnection(); } if (!connection) throw new Error('no running connection could be found'); const vmType: VMType = (connection.vmType ?? VMType.UNKNOWN) as VMType; // pull the image const imageInfo: ImageInfo = await this.pullImage( connection, config.image ?? this.getLlamaCppInferenceImage(vmType, gpu), config.labels, ); // Get the container creation options const containerCreateOptions: ContainerCreateOptions = await this.getContainerCreateOptions( config, imageInfo, vmType, gpu, ); // Create the container const { engineId, id } = await this.createContainer(imageInfo.engineId, containerCreateOptions, config.labels); return { container: { engineId: engineId, containerId: id, }, connection: { port: config.port, }, status: 'running', models: config.modelsInfo, type: InferenceType.LLAMA_CPP, labels: containerCreateOptions.Labels ?? {}, }; } protected getLlamaCppInferenceImage(vmType: VMType, gpu?: IGPUInfo): string { switch (vmType) { case VMType.WSL: if (gpu?.vendor === GPUVendor.NVIDIA) return llamacpp.cuda; if (gpu?.vendor === GPUVendor.INTEL) return llamacpp.intel; return llamacpp.default; case VMType.LIBKRUN: case VMType.LIBKRUN_LABEL: return llamacpp.default; // no GPU support case VMType.UNKNOWN: if (this.isNvidiaCDIConfigured(gpu)) return llamacpp.cuda; if (gpu?.vendor === GPUVendor.INTEL) return llamacpp.intel; return llamacpp.default; default: return llamacpp.default; } } protected isNvidiaCDIConfigured(gpu?: IGPUInfo): boolean { // NVIDIA cdi must be set up to use GPU acceleration on Linux. // Check the known locations for the configuration file const knownLocations = [ '/etc/cdi/nvidia.yaml', // Fedora ]; if (gpu?.vendor !== GPUVendor.NVIDIA) return false; let cdiSetup = false; for (const location of knownLocations) { if (fs.existsSync(location)) { cdiSetup = true; break; } } return cdiSetup; } } ================================================ FILE: packages/backend/src/workers/provider/OpenVINO.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { beforeEach, describe, expect, test, vi } from 'vitest'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { getImageInfo, LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import type { ContainerProviderConnection, ImageInfo } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import type { PodmanConnection } from '../../managers/podmanConnection'; import { VMType } from '@shared/models/IPodman'; import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry'; import type { InferenceServer } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; import { openvino } from '../../assets/inference-images.json'; import type { ContainerProviderConnectionInfo } from '@shared/models/IContainerConnectionInfo'; import { OpenVINO, SECOND } from './OpenVINO'; import type { ModelsManager } from '../../managers/modelsManager'; vi.mock('@podman-desktop/api', () => ({ containerEngine: { createContainer: vi.fn(), }, })); vi.mock('../../utils/inferenceUtils', () => ({ getProviderContainerConnection: vi.fn(), getImageInfo: vi.fn(), LABEL_INFERENCE_SERVER: 'ai-lab-inference-server', })); const taskRegistry: TaskRegistry = { createTask: vi.fn(), updateTask: vi.fn(), } as unknown as TaskRegistry; const DummyModel: ModelInfo = { name: 'dummy model', id: 'dummy-model-id', file: { file: '', path: 'dummy-path/snapshots/032c17573f64eacffe8514e7ee47cc0e532ed9a2', }, properties: {}, description: 'dummy-desc', }; const dummyConnection: ContainerProviderConnection = { name: 'dummy-provider-connection', type: 'podman', vmType: VMType.WSL, status: () => 'started', endpoint: { socketPath: 'dummy-socket', }, }; const DummyImageInfo: ImageInfo = { Id: 'dummy-image-id', engineId: 'dummy-engine-id', } as unknown as ImageInfo; const podmanConnection: PodmanConnection = { findRunningContainerProviderConnection: vi.fn(), getContainerProviderConnection: vi.fn(), } as unknown as PodmanConnection; const configurationRegistry: ConfigurationRegistry = { getExtensionConfiguration: vi.fn(), } as unknown as ConfigurationRegistry; const modelsManager: ModelsManager = { getModelInfo: vi.fn(), } as unknown as ModelsManager; beforeEach(() => { vi.resetAllMocks(); vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: false, modelsPath: 'model-path', apiPort: 10434, inferenceRuntime: 'llama-cpp', experimentalTuning: false, modelUploadDisabled: false, showGPUPromotion: false, appearance: 'dark', }); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue(dummyConnection); vi.mocked(podmanConnection.getContainerProviderConnection).mockReturnValue(dummyConnection); vi.mocked(getImageInfo).mockResolvedValue(DummyImageInfo); vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'dummy-task-id', name: '', labels: {}, state: 'loading' }); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'dummy-container-id', engineId: 'dummy-engine-id', }); }); test('OpenVINO being the default, it should always be enable', () => { const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); expect(provider.enabled()).toBeTruthy(); }); describe('perform', () => { test('config without image should use defined image', async () => { const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(podmanConnection.findRunningContainerProviderConnection).toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(dummyConnection, openvino.default, expect.anything()); }); test('config without models should throw an error', async () => { const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); await expect( provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [], connection: undefined, }), ).rejects.toThrowError('Need at least one model info to start an inference server.'); }); test('config model without file should throw an error', async () => { const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); await expect( provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [ { id: 'invalid', } as unknown as ModelInfo, ], connection: undefined, }), ).rejects.toThrowError('The model info file provided is undefined'); }); test('valid config should produce expected CreateContainerOptions', async () => { const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); vi.mocked(modelsManager.getModelInfo).mockReturnValue(DummyModel); const server = await provider.perform({ port: 8888, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: undefined, }); expect(server).toStrictEqual({ container: { containerId: 'dummy-container-id', engineId: DummyImageInfo.engineId, }, labels: { [LABEL_INFERENCE_SERVER]: `["${DummyModel.id}"]`, api: 'http://localhost:8888/v3', docs: 'http://localhost:10434/api-docs/8888', }, models: [DummyModel], status: 'running', type: InferenceType.OPENVINO, connection: { port: 8888, }, }); expect(containerEngine.createContainer).toHaveBeenCalledWith(DummyImageInfo.engineId, { Cmd: [ 'ovms', '--rest_port', '8000', '--config_path', '/model/snapshots/032c17573f64eacffe8514e7ee47cc0e532ed9a2/config-all.json', '--metrics_enable', ], Detach: true, Env: ['MODEL_PATH=/model', 'HOST=0.0.0.0', 'PORT=8000'], ExposedPorts: { '8888': {}, }, HealthCheck: { Interval: SECOND * 5, Retries: 20, Test: ['CMD-SHELL', 'curl -sSf localhost:8000/metrics > /dev/null'], }, HostConfig: { AutoRemove: false, Mounts: [ { Source: 'dummy-path', Target: '/model', Type: 'bind', }, ], PortBindings: { '8000/tcp': [ { HostPort: '8888', }, ], }, SecurityOpt: ['label=disable'], }, Image: DummyImageInfo.Id, Labels: { [LABEL_INFERENCE_SERVER]: `["${DummyModel.id}"]`, api: 'http://localhost:8888/v3', docs: 'http://localhost:10434/api-docs/8888', }, }); }); test('model properties should be made uppercased', async () => { const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [ { ...DummyModel, properties: { basicProp: 'basicProp', lotOfCamelCases: 'lotOfCamelCases', lowercase: 'lowercase', chatFormat: 'dummyChatFormat', }, }, ], connection: undefined, }); expect(containerEngine.createContainer).toHaveBeenCalledWith(DummyImageInfo.engineId, { Env: expect.arrayContaining([ 'MODEL_BASIC_PROP=basicProp', 'MODEL_LOT_OF_CAMEL_CASES=lotOfCamelCases', 'MODEL_LOWERCASE=lowercase', 'MODEL_CHAT_FORMAT=dummyChatFormat', ]), Cmd: expect.anything(), HealthCheck: expect.anything(), HostConfig: expect.anything(), ExposedPorts: expect.anything(), Labels: expect.anything(), Image: DummyImageInfo.Id, Detach: true, }); }); test('provided connection should be used for pulling the image', async () => { const connection: ContainerProviderConnectionInfo = { name: 'Dummy Podman', type: 'podman', vmType: VMType.WSL, status: 'started', providerId: 'fakeProviderId', }; const provider = new OpenVINO(taskRegistry, podmanConnection, modelsManager, configurationRegistry); await provider.perform({ port: 8000, image: undefined, labels: {}, modelsInfo: [DummyModel], connection: connection, }); expect(podmanConnection.getContainerProviderConnection).toHaveBeenCalledWith(connection); expect(podmanConnection.findRunningContainerProviderConnection).not.toHaveBeenCalled(); expect(getImageInfo).toHaveBeenCalledWith(dummyConnection, openvino.default, expect.anything()); }); }); ================================================ FILE: packages/backend/src/workers/provider/OpenVINO.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ContainerCreateOptions, ContainerProviderConnection, ImageInfo, MountConfig } from '@podman-desktop/api'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import { InferenceProvider } from './InferenceProvider'; import { getHuggingFaceModelMountInfo, getModelPropertiesForEnvironment } from '../../utils/modelsUtils'; import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils'; import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { type InferenceServer, InferenceType } from '@shared/models/IInference'; import { VMType } from '@shared/models/IPodman'; import type { PodmanConnection } from '../../managers/podmanConnection'; import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry'; import { openvino } from '../../assets/inference-images.json'; import { existsSync } from 'node:fs'; import { writeFile } from 'node:fs/promises'; import type { ModelInfo } from '@shared/models/IModelInfo'; import type { ModelsManager } from '../../managers/modelsManager'; export const SECOND: number = 1_000_000_000; const CONFIG_FILE_NAME = `config-all.json`; const GRAPH_CONTENT = `input_stream: "HTTP_REQUEST_PAYLOAD:input" output_stream: "HTTP_RESPONSE_PAYLOAD:output" node: { name: "LLMExecutor" calculator: "HttpLLMCalculator" input_stream: "LOOPBACK:loopback" input_stream: "HTTP_REQUEST_PAYLOAD:input" input_side_packet: "LLM_NODE_RESOURCES:llm" output_stream: "LOOPBACK:loopback" output_stream: "HTTP_RESPONSE_PAYLOAD:output" input_stream_info: { tag_index: 'LOOPBACK:0', back_edge: true } node_options: { [type.googleapis.com / mediapipe.LLMCalculatorOptions]: { models_path: "./", plugin_config: '{ "KV_CACHE_PRECISION": "u8"}', enable_prefix_caching: false, cache_size: 10, max_num_seqs: 256, device: "CPU", } } input_stream_handler { input_stream_handler: "SyncSetInputStreamHandler", options { [mediapipe.SyncSetInputStreamHandlerOptions.ext] { sync_set { tag_index: "LOOPBACK:0" } } } } }`; export class OpenVINO extends InferenceProvider { constructor( taskRegistry: TaskRegistry, private podmanConnection: PodmanConnection, private modelsManager: ModelsManager, private configurationRegistry: ConfigurationRegistry, ) { super(taskRegistry, InferenceType.OPENVINO, 'OpenVINO'); } dispose(): void {} public enabled = (): boolean => true; protected async getContainerCreateOptions( config: InferenceServerConfig, imageInfo: ImageInfo, modelInfo: ModelInfo, ): Promise { const labels: Record = { ...config.labels, [LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)), }; // get model mount settings const mountInfo = getHuggingFaceModelMountInfo(modelInfo); const target = `/model`; // mount the file directory to avoid adding other files to the containers const mounts: MountConfig = [ { Target: target, Source: mountInfo.mount, Type: 'bind', }, ]; const configFilePath = mountInfo.suffix ? `/model/${mountInfo.suffix}/${CONFIG_FILE_NAME}` : `/model/${CONFIG_FILE_NAME}`; // provide envs const envs: string[] = [`MODEL_PATH=${target}`, 'HOST=0.0.0.0', 'PORT=8000']; envs.push(...getModelPropertiesForEnvironment(modelInfo)); const cmd: string[] = ['ovms', '--rest_port', '8000', '--config_path', configFilePath, '--metrics_enable']; // add the link to our openAPI instance using the instance as the host const aiLabPort = this.configurationRegistry.getExtensionConfiguration().apiPort; // add in the URL the port of the inference server const aiLabDocsLink = `http://localhost:${aiLabPort}/api-docs/${config.port}`; // adding labels to inference server labels['docs'] = aiLabDocsLink; labels['api'] = `http://localhost:${config.port}/v3`; return { Image: imageInfo.Id, Detach: true, ExposedPorts: { [`${config.port}`]: {} }, HostConfig: { AutoRemove: false, Mounts: mounts, SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION], PortBindings: { '8000/tcp': [ { HostPort: `${config.port}`, }, ], }, }, HealthCheck: { // must be the port INSIDE the container not the exposed one Test: ['CMD-SHELL', `curl -sSf localhost:8000/metrics > /dev/null`], Interval: SECOND * 5, Retries: 4 * 5, }, Labels: labels, Env: envs, Cmd: cmd, }; } override async prePerform(config: InferenceServerConfig): Promise { const modelInfo = this.validateAndGetModelInfo(config); if (modelInfo.file === undefined) { throw new Error('The model info file provided is undefined'); } await this.ensureGraphFile(modelInfo.file.path); await this.ensureConfigFile(modelInfo); } async perform(config: InferenceServerConfig): Promise { const modelInfo = this.validateAndGetModelInfo(config); if (modelInfo.file === undefined) { throw new Error('The model info file provided is undefined'); } let connection: ContainerProviderConnection | undefined = undefined; if (config.connection) { connection = this.podmanConnection.getContainerProviderConnection(config.connection); } else { connection = this.podmanConnection.findRunningContainerProviderConnection(); } if (!connection) throw new Error('no running connection could be found'); const vmType: VMType = (connection.vmType ?? VMType.UNKNOWN) as VMType; // pull the image const imageInfo: ImageInfo = await this.pullImage( connection, config.image ?? this.getOpenVINOInferenceImage(vmType), config.labels, ); // Get the container creation options const containerCreateOptions: ContainerCreateOptions = await this.getContainerCreateOptions( config, imageInfo, modelInfo, ); // Create the container const { engineId, id } = await this.createContainer(imageInfo.engineId, containerCreateOptions, config.labels); return { container: { engineId: engineId, containerId: id, }, connection: { port: config.port, }, status: 'running', models: config.modelsInfo.map(model => this.modelsManager.getModelInfo(model.id)), type: InferenceType.OPENVINO, labels: containerCreateOptions.Labels ?? {}, }; } private validateAndGetModelInfo(config: InferenceServerConfig): ModelInfo { if (!this.enabled()) throw new Error('not enabled'); if (config.modelsInfo.length === 0) throw new Error('Need at least one model info to start an inference server.'); if (config.modelsInfo.length > 1) { throw new Error('Currently the inference server does not support multiple models serving.'); } return config.modelsInfo[0]; } private async ensureGraphFile(modelFolder: string): Promise { // check if the file exists const graphFile = `${modelFolder}/graph.pbtxt`; // check if the graph file exists if (!existsSync(graphFile)) { // create the graph file await writeFile(graphFile, GRAPH_CONTENT); } return graphFile; } private async ensureConfigFile(modelInfo: ModelInfo): Promise { const configFile = `${modelInfo.file?.path}/${CONFIG_FILE_NAME}`; if (!existsSync(configFile)) { const config = { mediapipe_config_list: [ { name: modelInfo.name, base_path: '.', }, ], model_config_list: [], }; await writeFile(configFile, JSON.stringify(config)); } return configFile; } protected getOpenVINOInferenceImage(_vmType: VMType): string { return openvino.default; } } ================================================ FILE: packages/backend/src/workers/provider/WhisperCpp.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { vi, test, expect, beforeEach } from 'vitest'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import { WhisperCpp } from './WhisperCpp'; import type { InferenceServer } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; import type { ContainerProviderConnection, ImageInfo } from '@podman-desktop/api'; import { containerEngine } from '@podman-desktop/api'; import { getImageInfo } from '../../utils/inferenceUtils'; import type { PodmanConnection } from '../../managers/podmanConnection'; import type { ContainerProviderConnectionInfo } from '@shared/models/IContainerConnectionInfo'; import { VMType } from '@shared/models/IPodman'; import { join } from 'node:path'; vi.mock('@podman-desktop/api', () => ({ containerEngine: { createContainer: vi.fn(), }, })); vi.mock('../../utils/inferenceUtils', () => ({ getProviderContainerConnection: vi.fn(), getImageInfo: vi.fn(), LABEL_INFERENCE_SERVER: 'ai-lab-inference-server', })); const connectionMock: ContainerProviderConnection = { name: 'dummy-provider-connection', type: 'podman', } as unknown as ContainerProviderConnection; const DummyImageInfo: ImageInfo = { Id: 'dummy-image-id', engineId: 'dummy-engine-id', } as unknown as ImageInfo; const taskRegistry: TaskRegistry = { createTask: vi.fn(), updateTask: vi.fn(), } as unknown as TaskRegistry; const podmanConnection: PodmanConnection = { findRunningContainerProviderConnection: vi.fn(), getContainerProviderConnection: vi.fn(), } as unknown as PodmanConnection; beforeEach(() => { vi.resetAllMocks(); vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue(connectionMock); vi.mocked(podmanConnection.getContainerProviderConnection).mockReturnValue(connectionMock); vi.mocked(taskRegistry.createTask).mockReturnValue({ id: 'dummy-task-id', name: '', labels: {}, state: 'loading' }); vi.mocked(getImageInfo).mockResolvedValue(DummyImageInfo); vi.mocked(containerEngine.createContainer).mockResolvedValue({ id: 'dummy-container-id', engineId: 'dummy-engine-id', }); }); test('provider requires at least one model', async () => { const provider = new WhisperCpp(taskRegistry, podmanConnection); await expect(() => { return provider.perform({ port: 8888, labels: {}, modelsInfo: [], }); }).rejects.toThrowError('Need at least one model info to start an inference server.'); }); test('provider requires a downloaded model', async () => { const provider = new WhisperCpp(taskRegistry, podmanConnection); await expect(() => { return provider.perform({ port: 8888, labels: {}, modelsInfo: [ { id: 'whisper-cpp', name: 'Whisper', properties: {}, description: 'whisper desc', }, ], }); }).rejects.toThrowError('The model info file provided is undefined'); }); test('provider requires a model with backend type Whisper', async () => { const provider = new WhisperCpp(taskRegistry, podmanConnection); await expect(() => { return provider.perform({ port: 8888, labels: {}, modelsInfo: [ { id: 'whisper-cpp', name: 'Whisper', properties: {}, description: 'whisper desc', file: { file: 'random-file', path: 'path-to-file', }, backend: InferenceType.LLAMA_CPP, }, ], }); }).rejects.toThrowError( `Whisper requires models with backend type ${InferenceType.WHISPER_CPP} got ${InferenceType.LLAMA_CPP}.`, ); }); test('custom image in inference server config should overwrite default', async () => { const provider = new WhisperCpp(taskRegistry, podmanConnection); const model = { id: 'whisper-cpp', name: 'Whisper', properties: {}, description: 'whisper desc', file: { file: 'random-file', path: 'path-to-file', }, backend: InferenceType.WHISPER_CPP, }; await provider.perform({ port: 8888, labels: { hello: 'world', }, image: 'localhost/whisper-cpp:custom', modelsInfo: [model], }); expect(getImageInfo).toHaveBeenCalledWith(connectionMock, 'localhost/whisper-cpp:custom', expect.any(Function)); }); test('provider should propagate labels', async () => { const provider = new WhisperCpp(taskRegistry, podmanConnection); const model = { id: 'whisper-cpp', name: 'Whisper', properties: {}, description: 'whisper desc', file: { file: 'random-file', path: 'path-to-file', }, backend: InferenceType.WHISPER_CPP, }; const server: InferenceServer = await provider.perform({ port: 8888, labels: { hello: 'world', }, modelsInfo: [model], }); expect(server).toStrictEqual({ connection: { port: 8888, }, container: { containerId: 'dummy-container-id', engineId: 'dummy-engine-id', }, labels: { 'ai-lab-inference-server': '["whisper-cpp"]', api: 'http://localhost:8888/inference', hello: 'world', }, models: [model], status: 'running', type: InferenceType.WHISPER_CPP, }); }); test('provided connection should be used for pulling the image', async () => { const connection: ContainerProviderConnectionInfo = { name: 'Dummy Podman', type: 'podman', vmType: VMType.WSL, status: 'started', providerId: 'fakeProviderId', }; const provider = new WhisperCpp(taskRegistry, podmanConnection); const model = { id: 'whisper-cpp', name: 'Whisper', properties: {}, description: 'whisper desc', file: { file: 'random-file', path: 'path-to-file', }, backend: InferenceType.WHISPER_CPP, }; await provider.perform({ connection: connection, port: 8888, labels: { hello: 'world', }, image: 'localhost/whisper-cpp:custom', modelsInfo: [model], }); expect(getImageInfo).toHaveBeenCalledWith(connectionMock, 'localhost/whisper-cpp:custom', expect.any(Function)); expect(podmanConnection.getContainerProviderConnection).toHaveBeenCalledWith(connection); expect(podmanConnection.findRunningContainerProviderConnection).not.toHaveBeenCalled(); // ensure the create container is called with appropriate arguments expect(containerEngine.createContainer).toHaveBeenCalledWith('dummy-engine-id', { Detach: true, Env: ['MODEL_PATH=/models/random-file', 'HOST=0.0.0.0', 'PORT=8000'], HostConfig: { AutoRemove: false, Mounts: [ { Source: join('path-to-file', 'random-file'), Target: '/models/random-file', Type: 'bind', }, ], PortBindings: { '8000/tcp': [ { HostPort: '8888', }, ], }, SecurityOpt: ['label=disable'], }, Image: 'dummy-image-id', Labels: { 'ai-lab-inference-server': '["whisper-cpp"]', api: 'http://localhost:8888/inference', hello: 'world', }, }); }); ================================================ FILE: packages/backend/src/workers/provider/WhisperCpp.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { InferenceProvider } from './InferenceProvider'; import type { TaskRegistry } from '../../registries/TaskRegistry'; import type { InferenceServer } from '@shared/models/IInference'; import { InferenceType } from '@shared/models/IInference'; import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig'; import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils'; import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/api'; import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils'; import { whispercpp } from '../../assets/inference-images.json'; import type { PodmanConnection } from '../../managers/podmanConnection'; import { getMountPath } from '../../utils/modelsUtils'; export class WhisperCpp extends InferenceProvider { constructor( taskRegistry: TaskRegistry, private podmanConnection: PodmanConnection, ) { super(taskRegistry, InferenceType.WHISPER_CPP, 'Whisper-cpp'); } override enabled(): boolean { return true; } override async perform(config: InferenceServerConfig): Promise { if (config.modelsInfo.length === 0) throw new Error('Need at least one model info to start an inference server.'); const modelInfo = config.modelsInfo[0]; if (modelInfo.file === undefined) { throw new Error('The model info file provided is undefined'); } if (modelInfo.backend !== InferenceType.WHISPER_CPP) { throw new Error( `Whisper requires models with backend type ${InferenceType.WHISPER_CPP} got ${modelInfo.backend}.`, ); } const labels: Record = { ...config.labels, [LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)), }; let connection: ContainerProviderConnection | undefined = undefined; if (config.connection) { connection = this.podmanConnection.getContainerProviderConnection(config.connection); } else { connection = this.podmanConnection.findRunningContainerProviderConnection(); } if (!connection) throw new Error('no running connection could be found'); // get model mount settings const filename = getMountPath(modelInfo); const target = `/models/${modelInfo.file.file}`; // mount the file directory to avoid adding other files to the containers const mounts: MountConfig = [ { Target: target, Source: filename, Type: 'bind', }, ]; const imageInfo = await this.pullImage(connection, config.image ?? whispercpp.default, labels); const envs: string[] = [`MODEL_PATH=${target}`, 'HOST=0.0.0.0', 'PORT=8000']; labels['api'] = `http://localhost:${config.port}/inference`; const containerInfo = await this.createContainer( imageInfo.engineId, { Image: imageInfo.Id, Detach: true, Labels: labels, HostConfig: { AutoRemove: false, Mounts: mounts, PortBindings: { '8000/tcp': [ { HostPort: `${config.port}`, }, ], }, SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION], }, Env: envs, }, labels, ); return { models: [modelInfo], status: 'running', connection: { port: config.port, }, container: { containerId: containerInfo.id, engineId: containerInfo.engineId, }, type: InferenceType.WHISPER_CPP, labels: labels, }; } override dispose(): void {} } ================================================ FILE: packages/backend/src/workers/uploader/UploaderOptions.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ModelInfo } from '@shared/models/IModelInfo'; import type { ContainerProviderConnection } from '@podman-desktop/api'; export interface UploaderOptions { model: ModelInfo; connection: ContainerProviderConnection; } ================================================ FILE: packages/backend/src/workers/uploader/WSLUploader.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { expect, test, describe, vi, beforeEach } from 'vitest'; import { WSLUploader } from './WSLUploader'; import type { ModelInfo } from '@shared/models/IModelInfo'; import { configuration, env, process, type ContainerProviderConnection, type RunResult } from '@podman-desktop/api'; import { VMType } from '@shared/models/IPodman'; vi.mock('@podman-desktop/api', () => ({ env: { isWindows: false, }, process: { exec: vi.fn(), }, configuration: { getConfiguration: vi.fn(), }, })); const connectionMock: ContainerProviderConnection = { name: 'machine2', type: 'podman', status: () => 'started', vmType: VMType.WSL, endpoint: { socketPath: 'socket.sock', }, }; const wslUploader = new WSLUploader(); beforeEach(() => { vi.resetAllMocks(); vi.mocked(configuration.getConfiguration).mockReturnValue({ get: () => 'podman.exe', has: vi.fn(), update: vi.fn(), }); }); describe('canUpload', () => { test('should return false if system is not windows', () => { vi.mocked(env).isWindows = false; const result = wslUploader.enabled(); expect(result).toBeFalsy(); }); test('should return true if system is windows', () => { vi.mocked(env).isWindows = true; const result = wslUploader.enabled(); expect(result).toBeTruthy(); }); }); describe('upload', () => { test('throw if localpath is not defined', async () => { await expect( wslUploader.perform({ connection: connectionMock, model: { file: undefined, } as unknown as ModelInfo, }), ).rejects.toThrowError('model is not available locally.'); }); test('non-WSL VMType should return the original path', async () => { vi.mocked(process.exec).mockRejectedValueOnce('error'); const result = await wslUploader.perform({ connection: { ...connectionMock, vmType: VMType.UNKNOWN, }, model: { id: 'dummyId', file: { path: 'C:\\Users\\podman\\folder', file: 'dummy.guff' }, } as unknown as ModelInfo, }); expect(process.exec).not.toHaveBeenCalled(); expect(result.startsWith('C:\\Users\\podman\\folder')).toBeTruthy(); }); test('copy model if not exists on podman machine', async () => { vi.mocked(process.exec).mockRejectedValueOnce('error'); await wslUploader.perform({ connection: connectionMock, model: { id: 'dummyId', file: { path: 'C:\\Users\\podman\\folder', file: 'dummy.guff' }, } as unknown as ModelInfo, }); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'stat', '/home/user/ai-lab/models/dummyId', ]); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'mkdir', '-p', '/home/user/ai-lab/models', ]); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'cp', '-r', '-L', '/mnt/c/Users/podman/folder/dummy.guff', '/home/user/ai-lab/models/dummyId', ]); }); test('copy model if not exists on podman machine with space handling', async () => { vi.mocked(process.exec).mockRejectedValueOnce('error'); await wslUploader.perform({ connection: connectionMock, model: { id: 'dummyId', file: { path: 'C:\\Users\\podman folder', file: 'dummy.guff' }, } as unknown as ModelInfo, }); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'stat', '/home/user/ai-lab/models/dummyId', ]); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'mkdir', '-p', '/home/user/ai-lab/models', ]); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'cp', '-r', '-L', '/mnt/c/Users/podman\\ folder/dummy.guff', '/home/user/ai-lab/models/dummyId', ]); }); test('do not copy model if it exists on podman machine', async () => { vi.mocked(process.exec).mockResolvedValue({} as RunResult); await wslUploader.perform({ connection: connectionMock, model: { id: 'dummyId', file: { path: 'C:\\Users\\podman\\folder', file: 'dummy.guff' }, } as unknown as ModelInfo, }); expect(process.exec).toBeCalledWith('podman.exe', [ 'machine', 'ssh', 'machine2', 'stat', '/home/user/ai-lab/models/dummyId', ]); expect(process.exec).toBeCalledTimes(1); }); }); ================================================ FILE: packages/backend/src/workers/uploader/WSLUploader.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import * as podmanDesktopApi from '@podman-desktop/api'; import { getPodmanCli, getPodmanMachineName } from '../../utils/podman'; import { getLocalModelFile, getRemoteModelFile, isModelUploaded } from '../../utils/modelsUtils'; import { WindowsWorker } from '../WindowsWorker'; import { VMType } from '@shared/models/IPodman'; import type { UploaderOptions } from './UploaderOptions'; import { escapeSpaces } from '../../utils/pathUtils'; import { dirname } from 'node:path'; export class WSLUploader extends WindowsWorker { async perform(options: UploaderOptions): Promise { const localPath = getLocalModelFile(options.model); // ensure the connection type is WSL if (options.connection.vmType !== VMType.WSL) { console.warn('cannot upload on non-WSL machine'); return localPath; } // the connection name cannot be used as it is const machineName = getPodmanMachineName(options.connection); const driveLetter = localPath.charAt(0); const convertToMntPath = escapeSpaces( localPath.replace(`${driveLetter}:\\`, `/mnt/${driveLetter.toLowerCase()}/`).replace(/\\/g, '/'), ); // check if model already loaded on the podman machine const existsRemote = await isModelUploaded(machineName, options.model); const remoteFile = escapeSpaces(getRemoteModelFile(options.model)); const baseFolder = dirname(remoteFile); // if not exists remotely it copies it from the local path if (!existsRemote) { await podmanDesktopApi.process.exec(getPodmanCli(), ['machine', 'ssh', machineName, 'mkdir', '-p', baseFolder]); await podmanDesktopApi.process.exec(getPodmanCli(), [ 'machine', 'ssh', machineName, 'cp', '-r', '-L', convertToMntPath, remoteFile, ]); } return remoteFile; } } ================================================ FILE: packages/backend/tsconfig.json ================================================ { "compilerOptions": { "target": "esnext", "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "lib": ["ES2022", "webworker", "dom"], "sourceMap": true, "outDir": "dist", "allowSyntheticDefaultImports": true, "skipLibCheck": true, "types": ["node"], "paths": { "@shared/*": ["../shared/src/*"] }, "strict": true, "noImplicitOverride": true, "noImplicitReturns": true, "noUnusedLocals": true }, "include": ["src", "types/*.d.ts", "../../types/*.d.ts", "../shared/*.ts", "../shared/**/*.ts"] } ================================================ FILE: packages/backend/vite.config.js ================================================ /********************************************************************** * Copyright (C) 2023 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import { join, resolve } from 'node:path'; import { builtinModules } from 'module'; import { existsSync } from 'node:fs'; import replace from '@rollup/plugin-replace'; import { cp, mkdir } from 'node:fs/promises'; const PACKAGE_ROOT = __dirname; /** * @type {import('vite').UserConfig} * @see https://vitejs.dev/config/ */ const config = { mode: process.env.MODE, root: PACKAGE_ROOT, envDir: process.cwd(), resolve: { alias: { '/@/': join(PACKAGE_ROOT, 'src') + '/', '/@gen/': join(PACKAGE_ROOT, 'src-generated') + '/', '@shared/': join(PACKAGE_ROOT, '../shared', 'src') + '/', }, mainFields: ['module', 'jsnext:main', 'jsnext'], //https://github.com/vitejs/vite/issues/16444 }, build: { sourcemap: 'inline', target: 'esnext', outDir: 'dist', assetsDir: '.', minify: process.env.MODE === 'production' ? 'esbuild' : false, lib: { entry: 'src/extension.ts', formats: ['cjs'], }, rollupOptions: { external: ['@podman-desktop/api', ...builtinModules.flatMap(p => [p, `node:${p}`])], output: { entryFileNames: '[name].cjs', }, plugins: [ { // copy the swagger-ui-dist files to the dist folder as we need the files to be served name: 'copy-swagger-ui', async buildStart() { const start = performance.now(); const source = resolve('../../node_modules/swagger-ui-dist'); const destination = resolve('dist/swagger-ui'); // Ensure destination directory exists if (!existsSync(destination)) { await mkdir(destination, { recursive: true }); } // Copy files await cp(source, destination, { recursive: true, filter: source => !source.includes('.map'), }); console.info(`Swagger UI files copied in ${Math.round(performance.now() - start)}ms to dist/swagger-ui`); }, }, ], }, emptyOutDir: false, reportCompressedSize: false, }, plugins: [ // This is to apply the patch https://github.com/JS-DevTools/ono/pull/20 // can be removed when the patch is merged replace({ delimiters: ['', ''], preventAssignment: true, values: { 'if (typeof module === "object" && typeof module.exports === "object") {': 'if (typeof module === "object" && typeof module.exports === "object" && typeof module.exports.default === "object") {', }, }), ], }; export default config; ================================================ FILE: packages/backend/vitest.config.js ================================================ /********************************************************************** * Copyright (C) 2023 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import path from 'node:path'; import { join } from 'path'; const PACKAGE_ROOT = __dirname; const config = { test: { include: ['**/*.{test,spec}.?(c|m)[jt]s?(x)', '../shared/**/*.{test,spec}.?(c|m)[jt]s?(x)'], coverage: { provider: 'v8', reporter: ['lcov', 'text'], extension: '.ts', }, }, resolve: { alias: { '@podman-desktop/api': path.resolve(__dirname, '__mocks__/@podman-desktop/api.js'), '/@/': join(PACKAGE_ROOT, 'src') + '/', '/@gen/': join(PACKAGE_ROOT, 'src-generated') + '/', '@shared/': join(PACKAGE_ROOT, '../shared', 'src') + '/', }, }, }; export default config; ================================================ FILE: packages/frontend/index.html ================================================ Podman Desktop
================================================ FILE: packages/frontend/package.json ================================================ { "name": "frontend-app", "displayName": "UI for AI Lab", "version": "1.10.0-next", "type": "module", "license": "Apache-2.0", "scripts": { "preview": "vite preview", "build": "vite build", "test": "vitest run --coverage", "test:watch": "vitest watch --coverage", "watch": "vite --mode development build -w" }, "dependencies": { "@fortawesome/fontawesome-free": "^7.2.0", "@fortawesome/free-brands-svg-icons": "^7.2.0", "@fortawesome/free-solid-svg-icons": "^7.2.0", "@fortawesome/free-regular-svg-icons": "^7.2.0", "@podman-desktop/ui-svelte": "1.21.0", "tinro": "^0.6.12", "filesize": "^11.0.17", "humanize-duration": "^3.33.2", "moment": "^2.30.1", "semver": "^7.7.4" }, "devDependencies": { "@sveltejs/vite-plugin-svelte": "6.2.4", "@tailwindcss/typography": "^0.5.19", "@tailwindcss/vite": "^4.2.4", "@testing-library/dom": "^10.4.1", "@testing-library/jest-dom": "^6.9.1", "@testing-library/svelte": "^5.3.1", "@testing-library/user-event": "^14.6.1", "@tsconfig/svelte": "^5.0.8", "@types/humanize-duration": "^3.27.4", "@typescript-eslint/eslint-plugin": "8.59.1", "jsdom": "^29.1.0", "monaco-editor": "^0.55.1", "postcss": "^8.5.12", "postcss-load-config": "^6.0.1", "svelte": "5.55.5", "svelte-fa": "^4.0.4", "svelte-select": "^5.8.3", "svelte-markdown": "^0.4.1", "svelte-preprocess": "^6.0.3", "tailwindcss": "^4.2.4", "vitest": "^3.0.5" } } ================================================ FILE: packages/frontend/src/App.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { vi, beforeEach, test, expect } from 'vitest'; import { render } from '@testing-library/svelte'; import App from '/@/App.svelte'; import { getRouterState, rpcBrowser } from '/@/utils/client'; import { configuration } from '/@/stores/extensionConfiguration'; import { MSG_NAVIGATION_ROUTE_UPDATE } from '@shared/Messages'; vi.mock('tinro', () => ({ router: { goto: vi.fn(), mode: { hash: vi.fn(), }, location: { query: new Map(), }, }, })); // mock monaco vi.mock('/@/lib/monaco-editor/MonacoEditor.svelte'); vi.mock('./stores/extensionConfiguration.ts', () => ({ configuration: { subscribe: vi.fn(), }, })); vi.mock('/@/lib/RecipeCardTags', () => ({ isDarkMode: vi.fn().mockReturnValue(false), })); vi.mock('./utils/client', async () => ({ studioClient: { getExtensionConfiguration: vi.fn(), }, instructlabClient: {}, rpcBrowser: { subscribe: vi.fn(), }, getRouterState: vi.fn(), saveRouterState: vi.fn(), })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(getRouterState).mockResolvedValue({ url: '/' }); vi.mocked(rpcBrowser.subscribe).mockReturnValue({ unsubscribe: vi.fn() }); vi.mocked(configuration.subscribe).mockReturnValue(vi.fn()); }); test('should subscribe to navigation update route on mount', async () => { render(App, {}); await vi.waitFor(() => { expect(rpcBrowser.subscribe).toHaveBeenCalledWith(MSG_NAVIGATION_ROUTE_UPDATE, expect.any(Function)); }); }); ================================================ FILE: packages/frontend/src/App.svelte ================================================
{#if meta.params.id === 'create'} {:else} {/if} {#if experimentalTuning} {/if} {#if meta.params.id === 'create'} {:else} {/if}
================================================ FILE: packages/frontend/src/Route.svelte ================================================ {#if showContent} {/if} ================================================ FILE: packages/frontend/src/app.css ================================================ @import 'tailwindcss'; @config '../tailwind.config.cjs'; ================================================ FILE: packages/frontend/src/index.html ================================================ AI Lab
================================================ FILE: packages/frontend/src/lib/ApplicationActions.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { expect, test, vi, beforeEach, describe } from 'vitest'; import { render, screen, fireEvent } from '@testing-library/svelte'; import { studioClient } from '../utils/client'; import ApplicationActions from '/@/lib/ApplicationActions.svelte'; import type { ApplicationState } from '@shared/models/IApplicationState'; import { router } from 'tinro'; vi.mock('../utils/client', async () => ({ studioClient: { requestStopApplication: vi.fn(), requestStartApplication: vi.fn(), requestRemoveApplication: vi.fn(), requestRestartApplication: vi.fn(), requestOpenApplication: vi.fn(), }, })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(studioClient.requestStopApplication).mockResolvedValue(undefined); vi.mocked(studioClient.requestStartApplication).mockResolvedValue(undefined); vi.mocked(studioClient.requestRemoveApplication).mockResolvedValue(undefined); vi.mocked(studioClient.requestRestartApplication).mockResolvedValue(undefined); vi.mocked(studioClient.requestOpenApplication).mockResolvedValue(undefined); }); test('deletion action should call requestRemoveApplication', async () => { render(ApplicationActions, { object: { pod: { Containers: [], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const deleteBtn = screen.getByTitle('Delete AI App'); expect(deleteBtn).toBeVisible(); await fireEvent.click(deleteBtn); expect(studioClient.requestRemoveApplication).toHaveBeenCalledWith('dummy-recipe-id', 'dummy-model-id'); }); describe('open action', () => { test('open action should call requestOpenApplication', async () => { render(ApplicationActions, { object: { pod: { Containers: [ { Status: 'running', }, ], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const openBtn = screen.getByTitle('Open AI App'); expect(openBtn).toBeVisible(); await fireEvent.click(openBtn); expect(studioClient.requestOpenApplication).toHaveBeenCalledWith('dummy-recipe-id', 'dummy-model-id'); }); test('open action should not be visible when all container exited', async () => { render(ApplicationActions, { object: { pod: { Containers: [ { Status: 'exited', }, ], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const openBtn = screen.queryByTitle('Open AI App'); expect(openBtn).toBeNull(); }); }); describe('start action', () => { test('start action should be visible when all container exited', async () => { render(ApplicationActions, { object: { pod: { Containers: [ { Status: 'exited', }, ], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const startBtn = screen.getByTitle('Start AI App'); expect(startBtn).toBeDefined(); await fireEvent.click(startBtn); expect(studioClient.requestStartApplication).toHaveBeenCalledWith('dummy-recipe-id', 'dummy-model-id'); }); test('start action should be hidden when one container is not exited', async () => { render(ApplicationActions, { object: { pod: { Containers: [ { Status: 'exited', }, { Status: 'running', }, ], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const startBtn = screen.queryByTitle('Start AI App'); expect(startBtn).toBeNull(); }); }); test('restart action should call requestRestartApplication', async () => { render(ApplicationActions, { object: { pod: { Containers: [], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const restartBtn = screen.getByTitle('Restart AI App'); expect(restartBtn).toBeVisible(); await fireEvent.click(restartBtn); expect(studioClient.requestRestartApplication).toHaveBeenCalledWith('dummy-recipe-id', 'dummy-model-id'); }); test('open recipe action should redirect to recipe page', async () => { const routerSpy = vi.spyOn(router, 'goto'); render(ApplicationActions, { object: { pod: { Containers: [], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', enableGoToRecipeAction: true, }); const openRecipeBtn = screen.getByTitle('Open Recipe'); expect(openRecipeBtn).toBeVisible(); await fireEvent.click(openRecipeBtn); expect(routerSpy).toHaveBeenCalledWith('/recipe/dummy-recipe-id'); }); test('open recipe action should not be visible by default', async () => { render(ApplicationActions, { object: { pod: { Containers: [], }, } as unknown as ApplicationState, recipeId: 'dummy-recipe-id', modelId: 'dummy-model-id', }); const openRecipeBtn = screen.getByTitle('Open Recipe'); expect(openRecipeBtn).toHaveClass('hidden'); }); ================================================ FILE: packages/frontend/src/lib/ApplicationActions.svelte ================================================ {#if object?.pod !== undefined} {#if exited} {:else} {/if} {/if} ================================================ FILE: packages/frontend/src/lib/Badge.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { expect, test } from 'vitest'; import { render, screen } from '@testing-library/svelte'; import Badge from './Badge.svelte'; import { faTrash } from '@fortawesome/free-solid-svg-icons'; test('print Badge with custom text and default background', async () => { render(Badge, { icon: faTrash, content: 'custom-text' }); const badgeContent = screen.getByText('custom-text'); expect(badgeContent).toBeInTheDocument(); expect(badgeContent).toHaveClass('bg-[var(--pd-label-bg)]'); }); test('print Badge with custom text and custom background', async () => { render(Badge, { icon: faTrash, content: 'custom-text', class: 'bg-[var(--pd-label-text)]' }); const badgeContent = screen.getByText('custom-text'); expect(badgeContent).toBeInTheDocument(); expect(badgeContent).toHaveClass('bg-[var(--pd-label-text)]'); expect(badgeContent).not.toHaveClass('bg-[var(--pd-label-bg)]'); }); ================================================ FILE: packages/frontend/src/lib/Badge.svelte ================================================
{#if icon} {/if} {content}
================================================ FILE: packages/frontend/src/lib/Card.svelte ================================================
{#if icon} {/if}
{#if title}
{title}
{/if} {#if description}
{description}
{/if}
================================================ FILE: packages/frontend/src/lib/ContentDetailsLayout.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { expect, test } from 'vitest'; import ContentDetailsLayoutTest from './ContentDetailsLayoutTest.svelte'; import { render, screen } from '@testing-library/svelte'; import userEvent from '@testing-library/user-event'; test('should open/close details panel when clicking on toggle button', async () => { render(ContentDetailsLayoutTest); const panelOpenDetails = screen.getByLabelText('toggle a label'); expect(panelOpenDetails).toHaveClass('hidden'); const panelAppDetails = screen.getByLabelText('a label panel'); expect(panelAppDetails).toHaveClass('block'); const btnShowPanel = screen.getByRole('button', { name: 'show a label' }); const btnHidePanel = screen.getByRole('button', { name: 'hide a label' }); await userEvent.click(btnHidePanel); expect(panelAppDetails).toHaveClass('hidden'); expect(panelOpenDetails).toHaveClass('block'); await userEvent.click(btnShowPanel); expect(panelAppDetails).toHaveClass('block'); expect(panelOpenDetails).toHaveClass('hidden'); }); ================================================ FILE: packages/frontend/src/lib/ContentDetailsLayout.svelte ================================================
{detailsTitle}
================================================ FILE: packages/frontend/src/lib/ContentDetailsLayoutTest.svelte ================================================ A Content Details... ================================================ FILE: packages/frontend/src/lib/ExpandableMessage.svelte ================================================ {#if message}
{message}
{/if} ================================================ FILE: packages/frontend/src/lib/FlatMenu.svelte ================================================ ================================================ FILE: packages/frontend/src/lib/Navigation.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { test, expect, vi } from 'vitest'; import { screen, render } from '@testing-library/svelte'; import Navigation from './Navigation.svelte'; import type { TinroRouteMeta } from 'tinro'; vi.mock('../utils/client', async () => ({ studioClient: { getExtensionConfiguration: vi.fn().mockResolvedValue({}), }, rpcBrowser: { subscribe: (): unknown => { return { unsubscribe: (): void => {}, }; }, }, })); test('Expect panel to have correct styling', async () => { render(Navigation, { meta: { url: 'test' } as TinroRouteMeta }); const panel = screen.getByLabelText('PreferencesNavigation'); expect(panel).toBeInTheDocument(); expect(panel).toHaveClass('bg-[var(--pd-secondary-nav-bg)]'); expect(panel).toHaveClass('border-[var(--pd-global-nav-bg-border)]'); expect(panel).toHaveClass('border-r-[1px]'); }); ================================================ FILE: packages/frontend/src/lib/Navigation.svelte ================================================ ================================================ FILE: packages/frontend/src/lib/RangeInput.svelte ================================================
{name}
================================================ FILE: packages/frontend/src/lib/RecipeCard.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { vi, test, expect, beforeEach, beforeAll } from 'vitest'; import { screen, render } from '@testing-library/svelte'; import { findLocalRepositoryByRecipeId } from '/@/utils/localRepositoriesUtils'; import RecipeCard from './RecipeCard.svelte'; import { writable, type Writable } from 'svelte/store'; import type { LocalRepository } from '@shared/models/ILocalRepository'; import { localRepositories } from '../stores/localRepositories'; vi.mock('/@/utils/localRepositoriesUtils', () => ({ findLocalRepositoryByRecipeId: vi.fn(), })); vi.mock('../stores/localRepositories', () => ({ localRepositories: { subscribe: vi.fn(), unsubscribe: vi.fn(), }, })); vi.mock('../utils/client', async () => { return { studioClient: {}, }; }); const mockLocalRepositories: Writable = writable([]); const recipe = { id: 'recipe 1', name: 'Recipe 1', readme: 'readme 1', categories: [], recommended: ['model1', 'model2'], description: 'description 1', repository: 'repo 1', }; class ResizeObserver { observe = vi.fn(); disconnect = vi.fn(); unobserve = vi.fn(); } beforeAll(() => { Object.defineProperty(window, 'ResizeObserver', { value: ResizeObserver }); }); vi.mock('/@/lib/RecipeCardTags', () => ({ isDarkMode: vi.fn().mockReturnValue(false), })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(localRepositories).subscribe.mockImplementation(run => mockLocalRepositories.subscribe(run)); }); test('recipe name and description', async () => { // eslint-disable-next-line sonarjs/publicly-writable-directories vi.mocked(findLocalRepositoryByRecipeId).mockReturnValue({ path: 'recipe1', sourcePath: '/tmp/recipe1', labels: {} }); render(RecipeCard, { recipe, }); const name = screen.queryByLabelText('Recipe 1 name'); expect(name).toBeInTheDocument(); const description = screen.queryByLabelText('Recipe 1 description'); expect(description).toBeInTheDocument(); const reference = screen.queryByLabelText('Recipe 1 ref'); expect(reference).not.toBeInTheDocument(); }); test('recipe name, description and reference', async () => { // eslint-disable-next-line sonarjs/publicly-writable-directories vi.mocked(findLocalRepositoryByRecipeId).mockReturnValue({ path: 'recipe1', sourcePath: '/tmp/recipe1', labels: {} }); render(RecipeCard, { recipe: { ...recipe, ref: 'myref' }, }); const name = screen.queryByLabelText('Recipe 1 name'); expect(name).toBeInTheDocument(); const description = screen.queryByLabelText('Recipe 1 description'); expect(description).toBeInTheDocument(); const reference = screen.queryByLabelText('Recipe 1 ref'); expect(reference).toBeInTheDocument(); }); ================================================ FILE: packages/frontend/src/lib/RecipeCard.svelte ================================================
{recipe.name} {recipe.description}
{#if recipe.ref} {recipe.ref} {/if}
================================================ FILE: packages/frontend/src/lib/RecipeCardTags.spec.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { screen, render } from '@testing-library/svelte'; import { beforeAll, expect, test, vi } from 'vitest'; import RecipeCardTags from '/@/lib/RecipeCardTags.svelte'; import userEvent from '@testing-library/user-event'; const recipe = { id: 'recipe1', name: 'recipe1', description: 'description', repository: 'repository', readme: 'readme', categories: ['natural-language-processing', 'audio'], languages: ['java', 'python'], frameworks: ['langchain', 'vectordb'], backend: 'whisper-cpp', }; class ResizeObserver { observe = vi.fn(); disconnect = vi.fn(); unobserve = vi.fn(); } vi.mock('/@/lib/RecipeCardTags', () => ({ getBGColor: vi.fn((_: string) => 'bg-purple-200'), getTextColor: vi.fn((_: string) => 'text-purple-200'), FRAMEWORKS: ['langchain', 'vectordb'], TOOLS: ['whisper-cpp'], })); beforeAll(() => { Object.defineProperty(window, 'ResizeObserver', { value: ResizeObserver }); }); test('Should render tags', () => { render(RecipeCardTags, { recipe: recipe }); const category1 = screen.getByText('Natural Language Processing'); expect(category1).toBeVisible(); const category2 = screen.getByText('Audio'); expect(category2).toBeVisible(); const language1 = screen.getByText('Java'); expect(language1).toBeVisible(); const language2 = screen.getByText('Python'); expect(language2).toBeVisible(); const framework1 = screen.getByText('langchain'); expect(framework1).toBeVisible(); const framework2 = screen.getByText('vectordb'); expect(framework2).toBeVisible(); const backend = screen.getByText('whisper-cpp'); expect(backend).toBeVisible(); }); test('Button should be visible with "+ X more"', () => { render(RecipeCardTags, { recipe: recipe }); const button = screen.getByRole('button'); expect(button).toBeVisible(); expect(button).toHaveTextContent('more'); }); test('Clicking on button should show all the tags', async () => { render(RecipeCardTags, { recipe: recipe }); const button = screen.getByRole('button'); expect(button).toBeVisible(); expect(button).toHaveTextContent('more'); // Clicking on the button await userEvent.click(button); expect(button).toHaveTextContent('Show less'); }); ================================================ FILE: packages/frontend/src/lib/RecipeCardTags.svelte ================================================
{#each TAGS as tag, i (i)}
{/each}
================================================ FILE: packages/frontend/src/lib/RecipeCardTags.ts ================================================ /********************************************************************** * Copyright (C) 2025 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import type { ExtensionConfiguration } from '@shared/models/IExtensionConfiguration'; import { studioClient } from '/@/utils/client'; import { gte } from 'semver'; const USE_CASES = ['natural-language-processing', 'audio', 'computer-vision']; const LANGUAGES = ['java', 'javascript', 'python']; export const FRAMEWORKS = ['langchain', 'langchain4j', 'quarkus', 'react', 'streamlit', 'vectordb', 'llama-stack-sdk']; export const TOOLS = ['none', 'llama-cpp', 'whisper-cpp', 'llama-stack']; // Defaulting to Podman Desktop min version we need to run let version: string = '1.8.0'; let configuration: ExtensionConfiguration; let isDark = true; async function setupProps(): Promise { configuration = await studioClient.getExtensionConfiguration(); version = (await studioClient.getPodmanDesktopVersion()).toString().replace(/-next/g, ''); if (configuration.appearance === 'dark') isDark = true; else if (configuration.appearance === 'light') isDark = false; else if (configuration.appearance === 'system') { const app = document.getElementById('app'); if (!app) throw new Error('cannot found app element'); const style = window.getComputedStyle(app); const color = style.getPropertyValue('--pd-terminal-background').trim(); isDark = color === '#000'; } } setupProps().catch((e: unknown) => { throw new Error(`Got an error when setting up props: ${e}`); }); function getColor(pdColor: string, darkColor: string, lightColor: string): string { if (gte(version, '1.17.0')) { return pdColor; } else { if (isDark) return darkColor; return lightColor; } } function createBGColorMap(): Map { return new Map([ ...USE_CASES.map( useCase => [useCase, getColor('bg-[var(--pd-label-primary-bg)]', 'bg-purple-700', 'bg-purple-300')] as [string, string], ), ...LANGUAGES.map( useCase => [useCase, getColor('bg-[var(--pd-label-secondary-bg)]', 'bg-sky-900', 'bg-sky-200')] as [string, string], ), ...FRAMEWORKS.map( useCase => [useCase, getColor('bg-[var(--pd-label-tertiary-bg)]', 'bg-green-900', 'bg-green-200')] as [string, string], ), ...TOOLS.map( useCase => [useCase, getColor('bg-[var(--pd-label-quaternary-bg)]', 'bg-amber-800', 'bg-amber-100')] as [string, string], ), ]); } function createTextColorMap(): Map { return new Map([ ...USE_CASES.map( useCase => [useCase, getColor('text-[var(--pd-label-primary-text)]', 'text-purple-300', 'text-purple-700')] as [ string, string, ], ), ...LANGUAGES.map( useCase => [useCase, getColor('text-[var(--pd-label-secondary-text)]', 'text-sky-200', 'text-sky-900')] as [ string, string, ], ), ...FRAMEWORKS.map( useCase => [useCase, getColor('text-[var(--pd-label-tertiary-text)]', 'text-green-200', 'text-green-900')] as [ string, string, ], ), ...TOOLS.map( useCase => [useCase, getColor('text-[var(--pd-label-quaternary-text)]', 'text-amber-400', 'text-amber-900')] as [ string, string, ], ), ]); } export function getBGColor(tag: string): string { const color = createBGColorMap().get(tag) ?? getColor('bg-[var(--pd-label-primary-bg)]', 'bg-purple-700', 'bg-purple-300'); return color; } export function getTextColor(tag: string): string { const color = createTextColorMap().get(tag) ?? getColor('text-[var(--pd-label-primary-text)]', 'text-purple-300', 'text-purple-700'); return color; } ================================================ FILE: packages/frontend/src/lib/RecipeDetails.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { vi, test, expect, beforeEach } from 'vitest'; import { screen, render } from '@testing-library/svelte'; import userEvent from '@testing-library/user-event'; import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog'; import * as catalogStore from '/@/stores/catalog'; import { readable, writable } from 'svelte/store'; import RecipeDetails from './RecipeDetails.svelte'; import * as tasksStore from '../stores/tasks'; import type { Task } from '@shared/models/ITask'; const mocks = vi.hoisted(() => { return { getLocalRepositoriesMock: vi.fn(), getTasksMock: vi.fn(), openFileMock: vi.fn(), requestDeleteLocalRepositoryMock: vi.fn(), }; }); vi.mock('../utils/client', async () => { return { studioClient: { openFile: mocks.openFileMock, requestDeleteLocalRepository: mocks.requestDeleteLocalRepositoryMock, }, rpcBrowser: { subscribe: (): unknown => { return { unsubscribe: (): void => {}, }; }, }, }; }); vi.mock('/@/stores/tasks', async () => { return { tasks: vi.fn(), }; }); vi.mock('/@/stores/catalog', async () => { return { catalog: vi.fn(), }; }); vi.mock('../stores/localRepositories', () => ({ localRepositories: { subscribe: (f: (msg: unknown) => void) => { f(mocks.getLocalRepositoriesMock()); return (): void => {}; }, }, })); const initialCatalog: ApplicationCatalog = { categories: [], models: [], recipes: [ { id: 'recipe 1', name: 'Recipe 1', readme: 'readme 1', categories: [], recommended: ['model1', 'model2'], description: 'description 1', repository: 'repo 1', }, { id: 'recipe 2', name: 'Recipe 2', readme: 'readme 2', categories: [], description: 'description 2', repository: 'repo 2', }, ], }; beforeEach(() => { vi.resetAllMocks(); mocks.getLocalRepositoriesMock.mockReturnValue([]); const tasksList = writable([]); vi.mocked(tasksStore).tasks = tasksList; mocks.openFileMock.mockReturnValue(Promise.resolve()); mocks.requestDeleteLocalRepositoryMock.mockReturnValue(Promise.resolve()); }); test('button vs code should be visible if local repository is not empty', async () => { mocks.getLocalRepositoriesMock.mockReturnValue([ { path: 'random-path', labels: { 'recipe-id': 'recipe 1', }, }, ]); vi.mocked(catalogStore).catalog = readable(initialCatalog); render(RecipeDetails, { recipeId: 'recipe 1', }); const button = screen.getByTitle('Open in VS Code Desktop'); expect(button).toBeDefined(); }); test('local clone and delete local clone buttons should be visible if local repository is not empty', async () => { mocks.getLocalRepositoriesMock.mockReturnValue([ { path: 'random-path', labels: { 'recipe-id': 'recipe 1', }, }, ]); vi.mocked(catalogStore).catalog = readable(initialCatalog); render(RecipeDetails, { recipeId: 'recipe 1', }); const buttonLocalClone = screen.getByRole('button', { name: 'Local clone' }); expect(buttonLocalClone).toBeDefined(); expect(buttonLocalClone).toBeInTheDocument(); await userEvent.click(buttonLocalClone); expect(mocks.openFileMock).toBeCalled(); const buttonDeleteClone = screen.getByTitle('Delete local clone'); expect(buttonDeleteClone).toBeDefined(); expect(buttonDeleteClone).toBeInTheDocument(); await userEvent.click(buttonDeleteClone); expect(mocks.requestDeleteLocalRepositoryMock).toBeCalled(); }); ================================================ FILE: packages/frontend/src/lib/RecipeDetails.svelte ================================================
Repository
{#if localPath}
{/if}
{#if localPath} {/if}
================================================ FILE: packages/frontend/src/lib/RecipeStatus.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { fireEvent, render, screen } from '@testing-library/svelte'; import { expect, test, vi } from 'vitest'; import RecipeStatus from '/@/lib/RecipeStatus.svelte'; import type { Recipe } from '@shared/models/IRecipe'; import { studioClient } from '/@/utils/client'; vi.mock('../utils/client', async () => ({ studioClient: { cloneApplication: vi.fn(), }, })); test('download icon should be visible when localPath is undefined', async () => { render(RecipeStatus, { recipe: {} as unknown as Recipe, localRepository: undefined, }); const icon = screen.getByLabelText('download icon'); expect(icon).toBeDefined(); }); test('chevron down icon should be visible when localPath is defined', async () => { render(RecipeStatus, { recipe: {} as unknown as Recipe, localRepository: { labels: {}, path: 'random-path', sourcePath: 'random-source-path', }, }); const icon = screen.getByLabelText('chevron down icon'); expect(icon).toBeDefined(); }); test('click on download icon should call cloneApplication', async () => { vi.mocked(studioClient.cloneApplication).mockResolvedValue(undefined); render(RecipeStatus, { recipe: { id: 'dummy-recipe-id', } as unknown as Recipe, localRepository: undefined, }); const button = screen.getByRole('button'); await fireEvent.click(button); await vi.waitFor(() => { expect(studioClient.cloneApplication).toHaveBeenCalledWith('dummy-recipe-id'); }); }); ================================================ FILE: packages/frontend/src/lib/RecipeStatus.svelte ================================================ {#key loading} {#if localRepository}
{:else} {/if} {loading ? 'Cloning...' : localRepository ? 'Recipe cloned' : 'Clone recipe'}
{/key} ================================================ FILE: packages/frontend/src/lib/RecipesCard.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { render, screen } from '@testing-library/svelte'; import { beforeAll, expect, test, vi } from 'vitest'; import RecipesCard from '/@/lib/RecipesCard.svelte'; vi.mock('../utils/client', async () => ({ studioClient: {}, })); vi.mock('../stores/localRepositories', () => ({ localRepositories: { subscribe: (f: (msg: unknown) => void) => { f([]); return (): void => {}; }, }, })); class ResizeObserver { observe = vi.fn(); disconnect = vi.fn(); unobserve = vi.fn(); } beforeAll(() => { Object.defineProperty(window, 'ResizeObserver', { value: ResizeObserver }); }); vi.mock('/@/lib/RecipeCardTags', () => ({ isDarkMode: vi.fn().mockReturnValue(false), })); test('recipes card without recipes should display empty message', async () => { render(RecipesCard, { recipes: [], category: { id: 'dummy-category', name: 'Dummy category', }, }); const message = screen.getByText('There is no recipe in this category for now ! Come back later'); expect(message).toBeDefined(); }); test('recipes card with recipes should display them', async () => { render(RecipesCard, { recipes: [ { id: 'recipe1', name: 'Recipe 1', models: ['model1'], categories: [], description: 'Recipe 1', readme: '', repository: 'https://recipe-1', }, ], category: { id: 'dummy-category', name: 'Dummy category', }, }); const text = screen.getAllByText('Recipe 1'); expect(text.length).toBeGreaterThan(0); }); ================================================ FILE: packages/frontend/src/lib/RecipesCard.svelte ================================================
{#if recipes.length === 0}
There is no recipe in this category for now ! Come back later
{/if}
{#each recipes as recipe (recipe.id)} {/each}
================================================ FILE: packages/frontend/src/lib/button/CopyButton.spec.ts ================================================ /********************************************************************** * Copyright (C) 2024 Red Hat, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 ***********************************************************************/ import '@testing-library/jest-dom/vitest'; import { expect, test, vi, beforeEach, describe } from 'vitest'; import { render, within, fireEvent, waitFor } from '@testing-library/svelte'; import CopyButton from '/@/lib/button/CopyButton.svelte'; import { studioClient } from '/@/utils/client'; vi.mock('../../utils/client', async () => ({ studioClient: { copyToClipboard: vi.fn(), }, })); beforeEach(() => { vi.resetAllMocks(); vi.mocked(studioClient.copyToClipboard).mockResolvedValue(undefined); }); test('clicking on the content should call copyToClipboard', async () => { const { container } = render(CopyButton, { content: 'dummy-content', }); const cpyButton = within(container).getByRole('button'); expect(cpyButton).toBeDefined(); await fireEvent.click(cpyButton); await waitFor(() => { expect(studioClient.copyToClipboard).toHaveBeenCalledWith('dummy-content'); }); }); describe('tooltips properties should be propagated', () => { test('top property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', top: true, }); const toolTip = container.querySelector('.tooltip.top'); expect(toolTip).toBeDefined(); }); test('topLeft property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', topLeft: true, }); const toolTip = container.querySelector('.tooltip.top-left'); expect(toolTip).toBeDefined(); }); test('topRight property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', topRight: true, }); const toolTip = container.querySelector('.tooltip.top-right'); expect(toolTip).toBeDefined(); }); test('right property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', right: true, }); const toolTip = container.querySelector('.tooltip.right'); expect(toolTip).toBeDefined(); }); test('bottom property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', bottom: true, }); const toolTip = container.querySelector('.tooltip.bottom'); expect(toolTip).toBeDefined(); }); test('bottomLeft property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', bottomLeft: true, }); const toolTip = container.querySelector('.tooltip.bottom-left'); expect(toolTip).toBeDefined(); }); test('bottomRight property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', bottomLeft: true, }); const toolTip = container.querySelector('.tooltip.bottom-right'); expect(toolTip).toBeDefined(); }); test('left property', async () => { const { container } = render(CopyButton, { content: 'dummy-content', left: true, }); const toolTip = container.querySelector('.tooltip.left'); expect(toolTip).toBeDefined(); }); }); ================================================ FILE: packages/frontend/src/lib/button/CopyButton.svelte ================================================ ================================================ FILE: packages/frontend/src/lib/button/ListItemButtonIcon.svelte ================================================ {#if menu}