Repository: containers/podman-desktop-extension-ai-lab
Branch: main
Commit: 28796a6eff7c
Files: 457
Total size: 2.2 MB
Directory structure:
gitextract_r3viv2ck/
├── .dockerignore
├── .editorconfig
├── .fmf/
│ └── version
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── epic.yml
│ │ ├── feature_request.yml
│ │ └── ux-request.yaml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── ai-lab-e2e-nightly-windows.yaml
│ ├── build-next.yaml
│ ├── compute-model-sizes.yml
│ ├── e2e-main-tf.yaml
│ ├── e2e-main.yaml
│ ├── llama-stack-playground.yaml
│ ├── pr-check.yaml
│ ├── ramalama.yaml
│ ├── recipe-catalog-change-cleanup.yaml
│ ├── recipe-catalog-change-template.yaml
│ ├── recipe-catalog-change-trigger.yaml
│ ├── release.yaml
│ ├── update-ramalama-references.sh
│ └── update-ramalama-references.yaml
├── .gitignore
├── .husky/
│ ├── commit-msg
│ └── pre-commit
├── .npmrc
├── .prettierrc
├── .vscode/
│ └── settings.json
├── CODE-OF-CONDUCT.md
├── Containerfile
├── LICENSE
├── MIGRATION.md
├── PACKAGING-GUIDE.md
├── README.md
├── RELEASE.md
├── SECURITY.md
├── USAGE_DATA.md
├── api/
│ └── openapi.yaml
├── clean.sh
├── commitlint.config.js
├── docs/
│ └── proposals/
│ ├── ai-studio.md
│ └── state-management.md
├── eslint.config.mjs
├── package.json
├── packages/
│ ├── backend/
│ │ ├── .gitignore
│ │ ├── __mocks__/
│ │ │ └── @podman-desktop/
│ │ │ └── api.js
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── assets/
│ │ │ │ ├── ai.json
│ │ │ │ ├── inference-images.json
│ │ │ │ ├── instructlab-images.json
│ │ │ │ ├── llama-stack-images.json
│ │ │ │ ├── llama-stack-playground-images.json
│ │ │ │ └── openai.json
│ │ │ ├── extension.spec.ts
│ │ │ ├── extension.ts
│ │ │ ├── instructlab-api-impl.ts
│ │ │ ├── llama-stack-api-impl.ts
│ │ │ ├── managers/
│ │ │ │ ├── GPUManager.spec.ts
│ │ │ │ ├── GPUManager.ts
│ │ │ │ ├── SnippetManager.spec.ts
│ │ │ │ ├── SnippetManager.ts
│ │ │ │ ├── TaskRunner.spec.ts
│ │ │ │ ├── TaskRunner.ts
│ │ │ │ ├── apiServer.spec.ts
│ │ │ │ ├── apiServer.ts
│ │ │ │ ├── application/
│ │ │ │ │ ├── applicationManager.spec.ts
│ │ │ │ │ └── applicationManager.ts
│ │ │ │ ├── catalogManager.spec.ts
│ │ │ │ ├── catalogManager.ts
│ │ │ │ ├── gitManager.spec.ts
│ │ │ │ ├── gitManager.ts
│ │ │ │ ├── inference/
│ │ │ │ │ ├── inferenceManager.spec.ts
│ │ │ │ │ └── inferenceManager.ts
│ │ │ │ ├── instructlab/
│ │ │ │ │ ├── instructlabManager.spec.ts
│ │ │ │ │ └── instructlabManager.ts
│ │ │ │ ├── llama-stack/
│ │ │ │ │ ├── llamaStackManager.spec.ts
│ │ │ │ │ └── llamaStackManager.ts
│ │ │ │ ├── modelsManager.spec.ts
│ │ │ │ ├── modelsManager.ts
│ │ │ │ ├── monitoringManager.spec.ts
│ │ │ │ ├── monitoringManager.ts
│ │ │ │ ├── playground/
│ │ │ │ │ ├── McpServerManager.spec.ts
│ │ │ │ │ ├── McpServerManager.ts
│ │ │ │ │ ├── aiSdk.spec.ts
│ │ │ │ │ └── aiSdk.ts
│ │ │ │ ├── playgroundV2Manager.spec.ts
│ │ │ │ ├── playgroundV2Manager.ts
│ │ │ │ ├── podmanConnection.spec.ts
│ │ │ │ ├── podmanConnection.ts
│ │ │ │ ├── recipes/
│ │ │ │ │ ├── BuilderManager.spec.ts
│ │ │ │ │ ├── BuilderManager.ts
│ │ │ │ │ ├── PodManager.spec.ts
│ │ │ │ │ ├── PodManager.ts
│ │ │ │ │ ├── RecipeManager.spec.ts
│ │ │ │ │ └── RecipeManager.ts
│ │ │ │ └── snippets/
│ │ │ │ ├── java-okhttp-snippet.spec.ts
│ │ │ │ ├── java-okhttp-snippet.ts
│ │ │ │ ├── python-langchain-snippet.spec.ts
│ │ │ │ ├── python-langchain-snippet.ts
│ │ │ │ ├── quarkus-snippet.spec.ts
│ │ │ │ └── quarkus-snippet.ts
│ │ │ ├── models/
│ │ │ │ ├── AIConfig.spec.ts
│ │ │ │ ├── AIConfig.ts
│ │ │ │ ├── ApplicationOptions.ts
│ │ │ │ ├── HuggingFaceModelHandler.spec.ts
│ │ │ │ ├── HuggingFaceModelHandler.ts
│ │ │ │ ├── ModelHandler.ts
│ │ │ │ ├── TaskRunner.ts
│ │ │ │ ├── URLModelHandler.ts
│ │ │ │ └── baseEvent.ts
│ │ │ ├── registries/
│ │ │ │ ├── ApplicationRegistry.ts
│ │ │ │ ├── CancellationTokenRegistry.spec.ts
│ │ │ │ ├── CancellationTokenRegistry.ts
│ │ │ │ ├── ConfigurationRegistry.spec.ts
│ │ │ │ ├── ConfigurationRegistry.ts
│ │ │ │ ├── ContainerRegistry.spec.ts
│ │ │ │ ├── ContainerRegistry.ts
│ │ │ │ ├── ConversationRegistry.ts
│ │ │ │ ├── InferenceProviderRegistry.ts
│ │ │ │ ├── LocalRepositoryRegistry.spec.ts
│ │ │ │ ├── LocalRepositoryRegistry.ts
│ │ │ │ ├── ModelHandlerRegistry.ts
│ │ │ │ ├── NavigationRegistry.spec.ts
│ │ │ │ ├── NavigationRegistry.ts
│ │ │ │ ├── TaskRegistry.spec.ts
│ │ │ │ └── TaskRegistry.ts
│ │ │ ├── studio-api-impl.spec.ts
│ │ │ ├── studio-api-impl.ts
│ │ │ ├── studio.spec.ts
│ │ │ ├── studio.ts
│ │ │ ├── templates/
│ │ │ │ ├── java-okhttp.mustache
│ │ │ │ ├── python-langchain.mustache
│ │ │ │ └── quarkus-langchain4j.mustache
│ │ │ ├── tests/
│ │ │ │ ├── ai-test.json
│ │ │ │ ├── ai-user-test.json
│ │ │ │ └── utils.ts
│ │ │ ├── utils/
│ │ │ │ ├── JsonWatcher.spec.ts
│ │ │ │ ├── JsonWatcher.ts
│ │ │ │ ├── Publisher.spec.ts
│ │ │ │ ├── Publisher.ts
│ │ │ │ ├── RecipeConstants.ts
│ │ │ │ ├── arch.ts
│ │ │ │ ├── catalogUtils.spec.ts
│ │ │ │ ├── catalogUtils.ts
│ │ │ │ ├── downloader.ts
│ │ │ │ ├── imagesUtils.spec.ts
│ │ │ │ ├── imagesUtils.ts
│ │ │ │ ├── inferenceUtils.spec.ts
│ │ │ │ ├── inferenceUtils.ts
│ │ │ │ ├── mcpUtils.ts
│ │ │ │ ├── modelsUtils.spec.ts
│ │ │ │ ├── modelsUtils.ts
│ │ │ │ ├── pathUtils.ts
│ │ │ │ ├── podman.spec.ts
│ │ │ │ ├── podman.ts
│ │ │ │ ├── podsUtils.ts
│ │ │ │ ├── ports.ts
│ │ │ │ ├── randomUtils.ts
│ │ │ │ ├── sha.spec.ts
│ │ │ │ ├── sha.ts
│ │ │ │ ├── uploader.spec.ts
│ │ │ │ ├── uploader.ts
│ │ │ │ ├── urldownloader.spec.ts
│ │ │ │ ├── urldownloader.ts
│ │ │ │ └── utils.ts
│ │ │ ├── webviewUtils.spec.ts
│ │ │ ├── webviewUtils.ts
│ │ │ └── workers/
│ │ │ ├── IWorker.ts
│ │ │ ├── WindowsWorker.ts
│ │ │ ├── provider/
│ │ │ │ ├── InferenceProvider.spec.ts
│ │ │ │ ├── InferenceProvider.ts
│ │ │ │ ├── LlamaCppPython.spec.ts
│ │ │ │ ├── LlamaCppPython.ts
│ │ │ │ ├── OpenVINO.spec.ts
│ │ │ │ ├── OpenVINO.ts
│ │ │ │ ├── WhisperCpp.spec.ts
│ │ │ │ └── WhisperCpp.ts
│ │ │ └── uploader/
│ │ │ ├── UploaderOptions.ts
│ │ │ ├── WSLUploader.spec.ts
│ │ │ └── WSLUploader.ts
│ │ ├── tsconfig.json
│ │ ├── vite.config.js
│ │ └── vitest.config.js
│ ├── frontend/
│ │ ├── index.html
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── App.spec.ts
│ │ │ ├── App.svelte
│ │ │ ├── Route.svelte
│ │ │ ├── app.css
│ │ │ ├── index.html
│ │ │ ├── lib/
│ │ │ │ ├── ApplicationActions.spec.ts
│ │ │ │ ├── ApplicationActions.svelte
│ │ │ │ ├── Badge.spec.ts
│ │ │ │ ├── Badge.svelte
│ │ │ │ ├── Card.svelte
│ │ │ │ ├── ContentDetailsLayout.spec.ts
│ │ │ │ ├── ContentDetailsLayout.svelte
│ │ │ │ ├── ContentDetailsLayoutTest.svelte
│ │ │ │ ├── ExpandableMessage.svelte
│ │ │ │ ├── FlatMenu.svelte
│ │ │ │ ├── Navigation.spec.ts
│ │ │ │ ├── Navigation.svelte
│ │ │ │ ├── RangeInput.svelte
│ │ │ │ ├── RecipeCard.spec.ts
│ │ │ │ ├── RecipeCard.svelte
│ │ │ │ ├── RecipeCardTags.spec.ts
│ │ │ │ ├── RecipeCardTags.svelte
│ │ │ │ ├── RecipeCardTags.ts
│ │ │ │ ├── RecipeDetails.spec.ts
│ │ │ │ ├── RecipeDetails.svelte
│ │ │ │ ├── RecipeStatus.spec.ts
│ │ │ │ ├── RecipeStatus.svelte
│ │ │ │ ├── RecipesCard.spec.ts
│ │ │ │ ├── RecipesCard.svelte
│ │ │ │ ├── button/
│ │ │ │ │ ├── CopyButton.spec.ts
│ │ │ │ │ ├── CopyButton.svelte
│ │ │ │ │ └── ListItemButtonIcon.svelte
│ │ │ │ ├── conversation/
│ │ │ │ │ ├── ChatMessage.svelte
│ │ │ │ │ ├── ConversationActions.svelte
│ │ │ │ │ ├── ElapsedTime.svelte
│ │ │ │ │ ├── SystemPromptBanner.spec.ts
│ │ │ │ │ ├── SystemPromptBanner.svelte
│ │ │ │ │ ├── ToolCallMessage.spec.ts
│ │ │ │ │ └── ToolCallMessage.svelte
│ │ │ │ ├── icons/
│ │ │ │ │ ├── InstructLabIcon.svelte
│ │ │ │ │ ├── ModelStatusIcon.spec.ts
│ │ │ │ │ ├── ModelStatusIcon.svelte
│ │ │ │ │ ├── ModelWhite.svelte
│ │ │ │ │ ├── PlaygroundWhite.svelte
│ │ │ │ │ └── RemoteModel.svelte
│ │ │ │ ├── images/
│ │ │ │ │ ├── DashboardBanner.svelte
│ │ │ │ │ ├── PodIcon.svelte
│ │ │ │ │ └── VSCodeIcon.svelte
│ │ │ │ ├── instructlab/
│ │ │ │ │ ├── AboutInstructLabDiscoverCard.svelte
│ │ │ │ │ └── AboutInstructLabExploreCard.svelte
│ │ │ │ ├── markdown/
│ │ │ │ │ ├── LinkComponent.svelte
│ │ │ │ │ └── MarkdownRenderer.svelte
│ │ │ │ ├── monaco-editor/
│ │ │ │ │ ├── MonacoEditor.svelte
│ │ │ │ │ └── monaco.ts
│ │ │ │ ├── notification/
│ │ │ │ │ ├── ContainerConnectionStatusInfo.spec.ts
│ │ │ │ │ ├── ContainerConnectionStatusInfo.svelte
│ │ │ │ │ ├── ContainerConnectionWrapper.spec.ts
│ │ │ │ │ ├── ContainerConnectionWrapper.svelte
│ │ │ │ │ ├── GPUEnabledMachine.spec.ts
│ │ │ │ │ ├── GPUEnabledMachine.svelte
│ │ │ │ │ ├── GPUPromotion.spec.ts
│ │ │ │ │ └── GPUPromotion.svelte
│ │ │ │ ├── progress/
│ │ │ │ │ ├── TaskItem.spec.ts
│ │ │ │ │ ├── TaskItem.svelte
│ │ │ │ │ ├── TasksBanner.spec.ts
│ │ │ │ │ ├── TasksBanner.svelte
│ │ │ │ │ ├── TasksProgress.spec.ts
│ │ │ │ │ ├── TasksProgress.svelte
│ │ │ │ │ ├── TrackedTasks.spec.ts
│ │ │ │ │ └── TrackedTasks.svelte
│ │ │ │ ├── select/
│ │ │ │ │ ├── ContainerProviderConnectionSelect.spec.ts
│ │ │ │ │ ├── ContainerProviderConnectionSelect.svelte
│ │ │ │ │ ├── InferenceRuntimeSelect.spec.ts
│ │ │ │ │ ├── InferenceRuntimeSelect.svelte
│ │ │ │ │ ├── ModelSelect.spec.ts
│ │ │ │ │ ├── ModelSelect.svelte
│ │ │ │ │ ├── Select.spec.ts
│ │ │ │ │ └── Select.svelte
│ │ │ │ └── table/
│ │ │ │ ├── application/
│ │ │ │ │ ├── ApplicationTable.spec.ts
│ │ │ │ │ ├── ApplicationTable.svelte
│ │ │ │ │ ├── ColumnActions.svelte
│ │ │ │ │ ├── ColumnAge.svelte
│ │ │ │ │ ├── ColumnModel.spec.ts
│ │ │ │ │ ├── ColumnModel.svelte
│ │ │ │ │ ├── ColumnPod.svelte
│ │ │ │ │ ├── ColumnRecipe.spec.ts
│ │ │ │ │ ├── ColumnRecipe.svelte
│ │ │ │ │ ├── ColumnRuntime.spec.ts
│ │ │ │ │ ├── ColumnRuntime.svelte
│ │ │ │ │ └── ColumnStatus.svelte
│ │ │ │ ├── instructlab/
│ │ │ │ │ ├── InstructlabColumnAge.svelte
│ │ │ │ │ ├── InstructlabColumnModelName.spec.ts
│ │ │ │ │ ├── InstructlabColumnModelName.svelte
│ │ │ │ │ ├── InstructlabColumnName.svelte
│ │ │ │ │ ├── InstructlabColumnRepository.svelte
│ │ │ │ │ ├── InstructlabColumnStatus.svelte
│ │ │ │ │ └── InstructlabColumnTargetModelName.svelte
│ │ │ │ ├── model/
│ │ │ │ │ ├── ModelColumnAction.spec.ts
│ │ │ │ │ ├── ModelColumnActions.svelte
│ │ │ │ │ ├── ModelColumnAge.spec.ts
│ │ │ │ │ ├── ModelColumnAge.svelte
│ │ │ │ │ ├── ModelColumnLabels.svelte
│ │ │ │ │ ├── ModelColumnName.spec.ts
│ │ │ │ │ ├── ModelColumnName.svelte
│ │ │ │ │ ├── ModelColumnRecipeSelection.svelte
│ │ │ │ │ ├── ModelColumnSize.spec.ts
│ │ │ │ │ └── ModelColumnSize.svelte
│ │ │ │ ├── playground/
│ │ │ │ │ ├── ConversationColumnAction.spec.ts
│ │ │ │ │ ├── ConversationColumnAction.svelte
│ │ │ │ │ ├── PlaygroundColumnIcon.svelte
│ │ │ │ │ ├── PlaygroundColumnModel.svelte
│ │ │ │ │ ├── PlaygroundColumnName.svelte
│ │ │ │ │ ├── PlaygroundColumnRuntime.spec.ts
│ │ │ │ │ └── PlaygroundColumnRuntime.svelte
│ │ │ │ └── service/
│ │ │ │ ├── ServiceAction.spec.ts
│ │ │ │ ├── ServiceAction.svelte
│ │ │ │ ├── ServiceColumnModelName.spec.ts
│ │ │ │ ├── ServiceColumnModelName.svelte
│ │ │ │ ├── ServiceColumnName.spec.ts
│ │ │ │ ├── ServiceColumnName.svelte
│ │ │ │ ├── ServiceColumnRuntime.spec.ts
│ │ │ │ ├── ServiceColumnRuntime.svelte
│ │ │ │ ├── ServiceStatus.spec.ts
│ │ │ │ └── ServiceStatus.svelte
│ │ │ ├── main.ts
│ │ │ ├── models/
│ │ │ │ └── IRouterState.ts
│ │ │ ├── pages/
│ │ │ │ ├── Applications.svelte
│ │ │ │ ├── CreateService.spec.ts
│ │ │ │ ├── CreateService.svelte
│ │ │ │ ├── Dashboard.spec.ts
│ │ │ │ ├── Dashboard.svelte
│ │ │ │ ├── ImportModel.spec.ts
│ │ │ │ ├── ImportModel.svelte
│ │ │ │ ├── InferenceServerDetails.spec.ts
│ │ │ │ ├── InferenceServerDetails.svelte
│ │ │ │ ├── InferenceServers.spec.ts
│ │ │ │ ├── InferenceServers.svelte
│ │ │ │ ├── Model.spec.ts
│ │ │ │ ├── Model.svelte
│ │ │ │ ├── Models.spec.ts
│ │ │ │ ├── Models.svelte
│ │ │ │ ├── NewInstructLabSession.spec.ts
│ │ │ │ ├── NewInstructLabSession.svelte
│ │ │ │ ├── Playground.spec.ts
│ │ │ │ ├── Playground.svelte
│ │ │ │ ├── PlaygroundCreate.spec.ts
│ │ │ │ ├── PlaygroundCreate.svelte
│ │ │ │ ├── Playgrounds.spec.ts
│ │ │ │ ├── Playgrounds.svelte
│ │ │ │ ├── Preferences.svelte
│ │ │ │ ├── Recipe.spec.ts
│ │ │ │ ├── Recipe.svelte
│ │ │ │ ├── Recipes.spec.ts
│ │ │ │ ├── Recipes.svelte
│ │ │ │ ├── StartRecipe.spec.ts
│ │ │ │ ├── StartRecipe.svelte
│ │ │ │ ├── TuneSessions.spec.ts
│ │ │ │ ├── TuneSessions.svelte
│ │ │ │ ├── applications.ts
│ │ │ │ ├── instructlab/
│ │ │ │ │ ├── AboutInstructLab.spec.ts
│ │ │ │ │ ├── AboutInstructLab.svelte
│ │ │ │ │ ├── StartInstructLabContainer.spec.ts
│ │ │ │ │ └── StartInstructLabContainer.svelte
│ │ │ │ ├── llama-stack/
│ │ │ │ │ ├── StartLlamaStackContainer.spec.ts
│ │ │ │ │ └── StartLlamaStackContainer.svelte
│ │ │ │ └── server-information/
│ │ │ │ ├── LocalServer.spec.ts
│ │ │ │ └── LocalServer.svelte
│ │ │ ├── stores/
│ │ │ │ ├── application-states.ts
│ │ │ │ ├── catalog.ts
│ │ │ │ ├── containerProviderConnections.ts
│ │ │ │ ├── conversations.ts
│ │ │ │ ├── extensionConfiguration.ts
│ │ │ │ ├── inferenceServers.ts
│ │ │ │ ├── instructlabSessions.ts
│ │ │ │ ├── localRepositories.ts
│ │ │ │ ├── modelsInfo.spec.ts
│ │ │ │ ├── modelsInfo.ts
│ │ │ │ ├── rpcReadable.spec.ts
│ │ │ │ ├── rpcReadable.ts
│ │ │ │ ├── snippetLanguages.ts
│ │ │ │ └── tasks.ts
│ │ │ └── utils/
│ │ │ ├── categoriesUtils.ts
│ │ │ ├── client.ts
│ │ │ ├── dimensions.ts
│ │ │ ├── fileUtils.ts
│ │ │ ├── localRepositoriesUtils.ts
│ │ │ ├── printers.ts
│ │ │ ├── taskUtils.ts
│ │ │ └── versionControlUtils.ts
│ │ ├── tailwind.config.cjs
│ │ ├── tsconfig.json
│ │ └── vite.config.js
│ └── shared/
│ ├── __mocks__/
│ │ └── @podman-desktop/
│ │ └── api.js
│ ├── src/
│ │ ├── InstructlabAPI.ts
│ │ ├── LlamaStackAPI.ts
│ │ ├── Messages.ts
│ │ ├── StudioAPI.ts
│ │ ├── messages/
│ │ │ ├── MessageProxy.spec.ts
│ │ │ └── MessageProxy.ts
│ │ ├── models/
│ │ │ ├── FilterRecipesResult.ts
│ │ │ ├── IApplicationCatalog.ts
│ │ │ ├── IApplicationState.ts
│ │ │ ├── ICategory.ts
│ │ │ ├── IContainerConnectionInfo.ts
│ │ │ ├── IExtensionConfiguration.ts
│ │ │ ├── IGPUInfo.ts
│ │ │ ├── IInference.spec.ts
│ │ │ ├── IInference.ts
│ │ │ ├── ILocalModelInfo.ts
│ │ │ ├── ILocalRepository.ts
│ │ │ ├── IModelInfo.ts
│ │ │ ├── IModelOptions.ts
│ │ │ ├── IModelResponse.ts
│ │ │ ├── IPlaygroundMessage.ts
│ │ │ ├── IPlaygroundV2.ts
│ │ │ ├── IPodman.ts
│ │ │ ├── IRecipe.ts
│ │ │ ├── IRecipeModelIndex.ts
│ │ │ ├── ITask.ts
│ │ │ ├── InferenceServerConfig.ts
│ │ │ ├── McpSettings.ts
│ │ │ ├── RequestOptions.ts
│ │ │ ├── instructlab/
│ │ │ │ ├── IInstructlabContainerConfiguration.ts
│ │ │ │ ├── IInstructlabContainerInfo.ts
│ │ │ │ └── IInstructlabSession.ts
│ │ │ └── llama-stack/
│ │ │ ├── LlamaStackContainerConfiguration.ts
│ │ │ └── LlamaStackContainerInfo.ts
│ │ └── uri/
│ │ ├── Uri.spec.ts
│ │ └── Uri.ts
│ ├── tsconfig.json
│ ├── vite.config.js
│ └── vitest.config.js
├── pnpm-workspace.yaml
├── tests/
│ ├── playwright/
│ │ ├── package.json
│ │ ├── playwright.config.ts
│ │ ├── src/
│ │ │ ├── ai-lab-extension.spec.ts
│ │ │ ├── model/
│ │ │ │ ├── ai-lab-app-details-page.ts
│ │ │ │ ├── ai-lab-base-page.ts
│ │ │ │ ├── ai-lab-creating-model-service-page.ts
│ │ │ │ ├── ai-lab-dashboard-page.ts
│ │ │ │ ├── ai-lab-local-server-page.ts
│ │ │ │ ├── ai-lab-model-catalog-page.ts
│ │ │ │ ├── ai-lab-model-llamastack-page.ts
│ │ │ │ ├── ai-lab-model-service-page.ts
│ │ │ │ ├── ai-lab-navigation-bar.ts
│ │ │ │ ├── ai-lab-playground-details-page.ts
│ │ │ │ ├── ai-lab-playgrounds-page.ts
│ │ │ │ ├── ai-lab-recipes-catalog-page.ts
│ │ │ │ ├── ai-lab-running-apps-page.ts
│ │ │ │ ├── ai-lab-service-details-page.ts
│ │ │ │ ├── ai-lab-start-recipe-page.ts
│ │ │ │ ├── ai-lab-try-instructlab-page.ts
│ │ │ │ ├── podman-extension-ai-lab-details-page.ts
│ │ │ │ └── preferences-extension-ai-lab-page.ts
│ │ │ └── utils/
│ │ │ ├── aiLabHandler.ts
│ │ │ └── webviewHandler.ts
│ │ └── tsconfig.json
│ └── tmt/
│ ├── plans/
│ │ ├── ai-lab-e2e-plan-default.fmf
│ │ └── ai-lab-e2e-plan-gpu.fmf
│ ├── scripts/
│ │ ├── create-results.sh
│ │ └── install-podman.sh
│ └── tests/
│ ├── e2e-test.fmf
│ ├── instructlab-test.fmf
│ └── smoke-test.fmf
├── tools/
│ └── compute-model-sizes.sh
└── types/
├── additional.d.ts
├── mustache.d.ts
├── podman-desktop-api.d.ts
└── postman-code-generators.d.ts
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
node_modules
================================================
FILE: .editorconfig
================================================
# EditorConfig is awesome: http://EditorConfig.org
# https://github.com/jokeyrhyme/standard-editorconfig
# top-most EditorConfig file
root = true
# defaults
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_size = 2
indent_style = space
[*.md]
trim_trailing_whitespace = false
================================================
FILE: .fmf/version
================================================
1
================================================
FILE: .gitattributes
================================================
* text=auto eol=lf
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug 🐞
description: Report a bug report
type: bug
body:
- type: markdown
attributes:
value: |
Before opening a bug report, please search for the behaviour in the existing issues.
---
Thank you for taking the time to file a bug report. To address this bug as fast as possible, we need some information.
- type: textarea
id: bug-description
attributes:
label: Bug description
description: What happened?
validations:
required: true
- type: input
id: os
attributes:
label: Operating system
description: "Which operating system are you on? Please provide the version as well. If you are on a Mac, please specify Apple silicon or Intel."
placeholder: "macOS Ventura 13.4 (Arm), Windows 11"
validations:
required: true
- type: dropdown
id: install
attributes:
label: Installation Method
description: "How did you install AI Lab ?"
options:
- "from `ghcr.io/containers/podman-desktop-extension-ai-lab` container image"
- "from Podman-Desktop extension page"
- "Other"
- type: dropdown
id: version
attributes:
label: Version
description: What version of the software are you running?
options:
- "next (development version)"
- "1.3.x"
- "1.2.x"
- "1.1.x"
- "1.0.x"
validations:
required: true
- type: textarea
id: steps
attributes:
label: Steps to reproduce
description: What steps do we need to take to reproduce this error?
- type: textarea
id: logs
attributes:
label: Relevant log output
description: If applicable, provide relevant log output.
render: shell
- type: textarea
id: additional-context
attributes:
label: Additional context
description: Add any other context or screenshots here.
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
================================================
FILE: .github/ISSUE_TEMPLATE/epic.yml
================================================
name: Epic ⚡
description: A high-level feature
type: epic
body:
- type: markdown
attributes:
value: |
Epics are normally created by the development team, to group a set of related features and plan work across multiple sprints.
The features this epic includes are referenced with the text of the epic.
- type: textarea
id: domain
attributes:
label: Epic domain
description: A clear and concise description of the feature area or domain that this epic will address.
placeholder: AI-Lab should support [...]
validations:
required: true
- type: textarea
id: additional-context
attributes:
label: Additional context
description: Add any other context or screenshots here.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature 💡
description: A request, idea, or new functionality
type: feature
body:
- type: markdown
attributes:
value: |
Before opening a feature request, please search for potential existing issues.
---
Thank you for taking the time to file a feature request, we appreciate and value your time to help the project!
- type: textarea
id: problem
attributes:
label: Is your feature request related to a problem? Please describe
description: A clear and concise description of what the problem is.
placeholder: I'm always frustrated when [...]
validations:
required: true
- type: textarea
id: solution
attributes:
label: Describe the solution you'd like
description: A clear and concise description of what you want to happen.
validations:
required: true
- type: textarea
id: alternatives
attributes:
label: Describe alternatives you've considered
description: A clear and concise description of any alternative solutions or features you've considered.
- type: textarea
id: additional-context
attributes:
label: Additional context
description: Add any other context or screenshots here.
================================================
FILE: .github/ISSUE_TEMPLATE/ux-request.yaml
================================================
name: UX Request
description: UX Request Form
type: UX (design spec)
labels: [UX/UI Issue, Graphic design]
body:
- type: markdown
attributes:
value: |
Before opening a UX request, please search for existing issues.
---
- type: textarea
id: UX-description
attributes:
label: UX Description
description: Describe the request
validations:
required: true
- type: dropdown
id: request-type
attributes:
label: Request type
description: "What type of request is this?"
options:
- "A logo design"
- "An icon"
- "An infographic/chart"
- "a template or design for printed materials"
- "Swag design"
- "Graphic design not covered by the above"
- type: dropdown
id: user-experience
attributes:
label: User Experience Request type
description: "What type of request is this?"
options:
- "UX analysis/suggestions for improvement"
- "User research"
- "User testing"
- "Application mockups/designs"
- "Website mockups/designs"
- "Something else UX-related"
- type: textarea
id: Contacts
attributes:
label: Engineering Contact
description: Who is the primary engineer the design team can speak with about this issue?
- type: textarea
id: Deadlne
attributes:
label: Deadline for request
description: When do you need this? If this is for an event, please let us know the date of the evnt and any lead time you need to get materials produced.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
### What does this PR do?
### Screenshot / video of UI
### What issues does this PR fix or reference?
### How to test this PR?
================================================
FILE: .github/dependabot.yml
================================================
# Set update schedule for GitHub Actions
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
open-pull-requests-limit: 10
- package-ecosystem: "npm"
directory: "/"
schedule:
interval: daily
open-pull-requests-limit: 10
groups:
fortawesome:
applies-to: version-updates
patterns:
- "@fortawesome/*"
ai-sdk:
applies-to: version-updates
patterns:
- "@ai-sdk/mcp"
- "ai"
================================================
FILE: .github/workflows/ai-lab-e2e-nightly-windows.yaml
================================================
#
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: Podman Desktop AI Lab E2E Nightly
run-name: Podman Desktop AI Lab E2E Nightly ${{ github.event_name == 'push' && '[Recipe change]' || '' }}
on:
schedule:
- cron: '0 2 * * *'
push:
paths:
- 'packages/backend/src/assets/ai.json'
workflow_dispatch:
inputs:
podman_desktop_repo_args:
default: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main'
description: 'Podman Desktop repo fork and branch'
type: string
required: true
ext_repo_options:
default: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main'
description: 'Podman Desktop Extension repo, fork and branch'
type: string
required: true
ext_tests_options:
default: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=1,EXT_TEST_GPU_SUPPORT_ENABLED=0'
description: 'E2E tests options in format VAR1=xxx,VAR2=true,VAR3=15 etc.'
type: string
required: true
npm_target:
default: 'test:e2e'
description: 'npm target to run tests'
type: string
required: true
podman_version:
default: 'latest'
description: 'Podman version (use "latest" to auto-fetch latest release, or specify version like "v5.6.1")'
type: string
required: true
podman_options:
default: 'INIT=1,START=1,ROOTFUL=1,NETWORKING=0'
description: 'Podman machine configuration options, no spaces'
type: string
required: true
env_vars:
default: 'TEST_PODMAN_MACHINE=true,ELECTRON_ENABLE_INSPECT=true'
description: 'Env. Variables passed into target machine, ie: VAR1=xxx,VAR2=true... use EXT_TEST_RAG_CHATBOT=1 to run RAG Chatbot test"'
type: string
required: true
pde2e_image_version:
default: 'v0.0.3'
description: 'PDE2E runner, builder, podman image versions'
type: string
required: true
mapt_params:
description: |
**Create instance(leave empty to use repo secrets/variables)**
**Format:** IMAGE=xxx;VERSION_TAG=xxx;CPUS=xxx;MEMORY=xxx;EXCLUDED_REGIONS=xxx
**Example:**
IMAGE=quay.io/redhat-developer/mapt;VERSION_TAG=v0.9.8;CPUS=4;MEMORY=32;EXCLUDED_REGIONS="westindia,centralindia,southindia,australiacentral,australiacentral2,australiaeast,australiasoutheast,southafricanorth,southafricawest"
required: false
type: string
jobs:
windows:
timeout-minutes: 180
name: windows-${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
windows-version: ['11']
windows-featurepack: ['25h2-ent']
steps:
- name: Fetch latest Podman version
id: fetch-podman
uses: redhat-actions/podman-install/.github/actions/fetch-latest-podman-version-windows@6b757b792b67ec663765a4f2ca36226e12b2f4cd
with:
version_input: ${{ github.event.inputs.podman_version || 'latest' }}
file_type: 'setup.exe'
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Set the default env. variables
env:
CI: true
DEFAULT_PODMAN_DESKTOP_REPO_ARGS: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main'
DEFAULT_NPM_TARGET: 'test:e2e'
DEFAULT_ENV_VARS: 'TEST_PODMAN_MACHINE=true,ELECTRON_ENABLE_INSPECT=true'
DEFAULT_PODMAN_OPTIONS: 'INIT=1,START=1,ROOTFUL=1,NETWORKING=0'
DEFAULT_EXT_TESTS_OPTIONS: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=1,EXT_TEST_GPU_SUPPORT_ENABLED=0'
DEFAULT_EXT_REPO_OPTIONS: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main'
DEFAULT_PDE2E_IMAGE_VERSION: 'v0.0.3'
run: |
echo "NPM_TARGET=${{ github.event.inputs.npm_target || env.DEFAULT_NPM_TARGET }}" >> $GITHUB_ENV
echo "ENV_VARS=${{ github.event.inputs.env_vars || env.DEFAULT_ENV_VARS }}" >> $GITHUB_ENV
echo "PODMAN_URL=${{ steps.fetch-podman.outputs.download_url }}" >> $GITHUB_ENV
echo "PDE2E_IMAGE_VERSION=${{ github.event.inputs.pde2e_image_version || env.DEFAULT_PDE2E_IMAGE_VERSION }}" >> $GITHUB_ENV
echo "${{ github.event.inputs.podman_desktop_repo_args || env.DEFAULT_PODMAN_DESKTOP_REPO_ARGS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PD_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ github.event.inputs.ext_tests_options || env.DEFAULT_EXT_TESTS_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ github.event.inputs.podman_options || env.DEFAULT_PODMAN_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PODMAN_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ github.event.inputs.ext_repo_options || env.DEFAULT_EXT_REPO_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "EXT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
# For mapt_params, use repo variables directly if input is empty
if [ -n "${{ github.event.inputs.mapt_params }}" ]; then
mapt_params="${{ github.event.inputs.mapt_params }}"
else
mapt_params="IMAGE=${{ vars.MAPT_IMAGE }};VERSION_TAG=${{ vars.MAPT_VERSION_TAG }};CPUS=${{ vars.MAPT_CPUS }};MEMORY=${{ vars.MAPT_MEMORY }};EXCLUDED_REGIONS=\"${{ vars.MAPT_EXCLUDED_REGIONS }}\""
fi
echo "$mapt_params" | awk -F ';' '{for (i=1; i<=NF; i++) {split($i, kv, "="); print "MAPT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
- name: Create instance
uses: podman-desktop/e2e/.github/actions/create-instance@213a276952d746324895f63cea0b23083013990f
with:
mapt-image: ${{ env.MAPT_IMAGE || '' }}
mapt-version: ${{ env.MAPT_VERSION_TAG || '' }}
windows-version: ${{ matrix.windows-version }}
windows-featurepack: ${{ matrix.windows-featurepack }}
cpus: ${{ env.MAPT_CPUS || '' }}
memory: ${{ env.MAPT_MEMORY || '' }}
excluded-regions: ${{ env.MAPT_EXCLUDED_REGIONS || '' }}
arm-tenant-id: ${{ secrets.ARM_TENANT_ID }}
arm-subscription-id: ${{ secrets.ARM_SUBSCRIPTION_ID }}
arm-client-id: ${{ secrets.ARM_CLIENT_ID }}
arm-client-secret: ${{ secrets.ARM_CLIENT_SECRET }}
- name: Check instance system info
uses: podman-desktop/e2e/.github/actions/instance-system-info@3548105f45def129d5e3aaa5a3d922e09ac892d9
- name: Emulate X session
uses: podman-desktop/e2e/.github/actions/emulate-x-session@3548105f45def129d5e3aaa5a3d922e09ac892d9
- name: Download Podman, do not initialize
uses: podman-desktop/e2e/.github/actions/download-podman-nightly@952cafee20ca82b1ce48b29c848bac1c31062245
with:
podman-image-tag: ${{ env.PDE2E_IMAGE_VERSION }}
podman-download-url: ${{ env.PODMAN_URL }}
- name: Build Podman Desktop Electron Inspect Enabled binary
uses: podman-desktop/e2e/.github/actions/build-podman-desktop@0c1f0a035e0949941fd6abf959ab556ceec13f03
with:
fork: ${{ env.PD_FORK }}
branch: ${{ env.PD_BRANCH }}
env-vars: ${{ env.ENV_VARS }}
- name: Run Podman Desktop Playwright E2E tests
uses: podman-desktop/e2e/.github/actions/run-playwright-test@15b800edab941d394b32aaaa3f7961bb7db7ec3a
with:
pde2e-runner-tag: ${{ env.PDE2E_IMAGE_VERSION }}
podman-desktop-path: true
fork-repo: ${{ env.PD_FORK }}
branch-name: ${{ env.PD_BRANCH }}
ext-repo: ${{ env.EXT_REPO }}
ext-fork: ${{ env.EXT_FORK }}
ext-branch: ${{ env.EXT_BRANCH }}
ext-tests: ${{ env.EXT_RUN_TESTS_FROM_EXTENSION }}
npm-target: ${{ env.NPM_TARGET }}
podman-init: ${{ env.PODMAN_INIT }}
podman-start: ${{ env.PODMAN_START }}
rootful: ${{ env.PODMAN_ROOTFUL }}
user-networking: ${{ env.PODMAN_NETWORKING }}
podman-provider: 'wsl'
env-vars: ${{ env.ENV_VARS }}
ci-bot-token: ${{ secrets.PODMAN_DESKTOP_BOT_TOKEN }}
- name: Destroy instance
if: always()
uses: podman-desktop/e2e/.github/actions/destroy-instance@36e440f2ac18193214f4ffa8f7f1c4c0cb8c9446
with:
mapt-image: ${{ env.MAPT_IMAGE }}
mapt-version: ${{ env.MAPT_VERSION_TAG }}
arm-tenant-id: ${{ secrets.ARM_TENANT_ID }}
arm-subscription-id: ${{ secrets.ARM_SUBSCRIPTION_ID }}
arm-client-id: ${{ secrets.ARM_CLIENT_ID }}
arm-client-secret: ${{ secrets.ARM_CLIENT_SECRET }}
- name: Publish Test Report
uses: mikepenz/action-junit-report@v6
if: always()
with:
annotate_only: true
fail_on_failure: true
include_passed: true
detailed_summary: true
require_tests: true
report_paths: '**/*results.xml'
- name: Upload test artifacts
uses: actions/upload-artifact@v7
if: always()
with:
name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}
path: |
results/*
!./**/*.gguf
!./**/*.bin
!./**/output/videos/*
!./**/output/traces/*
- name: Upload test videos
uses: actions/upload-artifact@v7
if: always()
with:
name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-videos
path: ./**/output/videos/*
- name: Upload test traces
uses: actions/upload-artifact@v7
if: always()
with:
name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-traces
path: ./**/output/traces/*
================================================
FILE: .github/workflows/build-next.yaml
================================================
#
# Copyright (C) 2023-2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: CI
on:
push:
branches:
- 'main'
jobs:
build:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6.0.2
- uses: pnpm/action-setup@v5
name: Install pnpm
with:
run_install: false
- uses: actions/setup-node@v6
with:
node-version: 24
cache: 'pnpm'
- name: Execute pnpm
run: pnpm install
- name: Run Build
run: pnpm build
- name: Login to ghcr.io
run: podman login --username ${{ github.repository_owner }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
- name: Publish Image
id: publish-image
run: |
IMAGE_NAME=ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab
IMAGE_NIGHTLY=${IMAGE_NAME}:nightly
IMAGE_SHA=${IMAGE_NAME}:${GITHUB_SHA}
podman build -t $IMAGE_NIGHTLY .
podman push $IMAGE_NIGHTLY
podman tag $IMAGE_NIGHTLY $IMAGE_SHA
podman push $IMAGE_SHA
================================================
FILE: .github/workflows/compute-model-sizes.yml
================================================
# This is a basic workflow that is manually triggered
name: Compute model sizes
# Controls when the action will run. Workflow runs when manually triggered using the UI
# or API.
on:
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "greet"
compute:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- uses: actions/checkout@v6.0.2
# Runs a single command using the runners shell
- name: Compute model size
run: ./tools/compute-model-sizes.sh
================================================
FILE: .github/workflows/e2e-main-tf.yaml
================================================
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: PD AI Lab E2E Nightly Testing Farm
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
podman_version:
default: 'latest'
description: 'Podman version to install (e.g., "5.5.2", "5.6.0~rc1"). Use "latest" for stable or "nightly" for the latest development build.'
type: string
required: true
npm_target:
description: npm tests target
type: choice
default: 'e2e'
options:
- e2e
- smoke
- instructlab
plan:
description: plans to run
type: choice
default: 'default'
options:
- default
- gpu
jobs:
pd-ai-lab-e2e-testing-farm:
name: pd-e2e-testing-farm-ci
runs-on: ubuntu-latest
timeout-minutes: 180
strategy:
fail-fast: false
matrix:
fedora-version: ['Fedora-42', 'Fedora-43']
plan: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.plan != '' && fromJSON(format('["{0}"]', github.event.inputs.plan)) || fromJSON('["default", "gpu"]') }}
steps:
- name: Set the default env. variables
env:
DEFAULT_NPM_TARGET: 'smoke'
DEFAULT_PODMAN_VERSION: 'latest'
DEFAULT_NODE_VERSION: 'v24.11.1'
run: |
echo "NPM_TARGET=${{ github.event.inputs.npm_target || env.DEFAULT_NPM_TARGET }}" >> $GITHUB_ENV
echo "PLAN=${{ matrix.plan }}" >> $GITHUB_ENV
echo "PODMAN_VERSION=${{ github.event.inputs.podman_version || env.DEFAULT_PODMAN_VERSION }}" >> $GITHUB_ENV
echo "NODE_VERSION=${{ vars.NODE_VERSION || env.DEFAULT_NODE_VERSION }}" >> $GITHUB_ENV
- name: Run Podman Desktop Playwright E2E tests on Testing Farm CI
id: run-e2e-tf
uses: sclorg/testing-farm-as-github-action@b23f0de29ac969d12411215a983da264b4ced149 #v4.2.0
with:
api_key: ${{ secrets.TF_TOKEN }}
create_github_summary: "false"
compose: ${{ matrix.fedora-version }}
tmt_plan_filter: 'name:/tests/tmt/plans/ai-lab-e2e-plan-${{ env.PLAN }}/${{ env.NPM_TARGET }}'
variables: COMPOSE=${{ matrix.fedora-version }};ARCH=x86_64;PODMAN_VERSION=${{ env.PODMAN_VERSION }};NODE_VERSION=${{ env.NODE_VERSION }}
- name: Extract Testing Farm work ID and base URL
if: always()
run: |
TF_ARTIFACTS_URL="${{ steps.run-e2e-tf.outputs.test_log_url }}"
TF_DEFAULT_JUNIT_DEFAULT="${TF_ARTIFACTS_URL}/results-junit.xml"
curl -o results-junit.xml "$TF_DEFAULT_JUNIT_DEFAULT"
TF_WORK_ID=$(grep -o 'work-${{ env.NPM_TARGET }}[^/"]*' results-junit.xml | head -1)
echo "TF_WORK_ID=$TF_WORK_ID" >> $GITHUB_ENV
echo "TF_ARTIFACTS_URL=$TF_ARTIFACTS_URL" >> $GITHUB_ENV
- name: Download Playwright JUnit report from Testing Farm
if: always()
run: |
TF_PLAYWRIGHT_JUNIT_URL="${{ env.TF_ARTIFACTS_URL }}/${{ env.TF_WORK_ID }}/tests/tmt/plans/ai-lab-e2e-plan-${{ env.PLAN }}/${{ env.NPM_TARGET }}/execute/data/guest/default-0/tests/tmt/tests/${{ env.NPM_TARGET }}-test-1/data/junit-results.xml"
curl -o junit-playwright-results.xml "$TF_PLAYWRIGHT_JUNIT_URL"
- name: Publish test report to PR
if: always()
uses: mikepenz/action-junit-report@5b7ee5a21e8674b695313d769f3cbdfd5d4d53a4 #v6.0.0
with:
fail_on_failure: true
include_passed: true
detailed_summary: true
annotate_only: true
require_tests: true
report_paths: '**/junit-playwright-results.xml'
- name: Download test artifacts from Testing Farm
if: failure()
run: |
mkdir -p results
TF_TEST_DATA_URL="${{ env.TF_ARTIFACTS_URL }}/${{ env.TF_WORK_ID }}/tests/tmt/plans/ai-lab-e2e-plan-${{ env.PLAN }}/${{ env.NPM_TARGET }}/execute/data/guest/default-0/tests/tmt/tests/${{ env.NPM_TARGET }}-test-1/data"
TF_TRACES_URL="${TF_TEST_DATA_URL}/traces/"
TF_VIDEOS_URL="${TF_TEST_DATA_URL}/videos/"
echo "Downloading traces"
wget \
--recursive \
--no-parent \
--no-host-directories \
--cut-dirs=10 \
--reject "index.html*" \
--directory-prefix=results \
"$TF_TRACES_URL"
echo "Downloading videos"
wget \
--recursive \
--no-parent \
--no-host-directories \
--cut-dirs=10 \
--reject "index.html*" \
--directory-prefix=results \
"$TF_VIDEOS_URL"
- name: Upload test artifacts
if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: ai-lab-testing-farm-artifacts-${{ matrix.fedora-version }}-${{ env.PLAN }}
path: |
results/*
**/junit-playwright-results.xml
================================================
FILE: .github/workflows/e2e-main.yaml
================================================
#
# Copyright (C) 2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: e2e-tests-main
on:
push:
branches: [main]
schedule:
- cron: '0 2 * * *'
workflow_dispatch:
inputs:
podman_desktop_repo_args:
default: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main'
description: 'Podman Desktop repo fork and branch'
type: string
required: true
ext_repo_options:
default: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main'
description: 'Podman Desktop Extension repo, fork and branch'
type: string
required: true
jobs:
e2e-tests:
name: Run E2E tests ${{ github.event_name == 'schedule' && '[nightly]' || '' }}
runs-on: ubuntu-24.04
steps:
- name: Set default env variables
env:
DEFAULT_PODMAN_DESKTOP_REPO_ARGS: 'REPO=podman-desktop,FORK=podman-desktop,BRANCH=main'
DEFAULT_EXT_REPO_OPTIONS: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main'
run: |
echo "${{ github.event.inputs.podman_desktop_repo_args || env.DEFAULT_PODMAN_DESKTOP_REPO_ARGS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PD_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ github.event.inputs.ext_repo_options || env.DEFAULT_EXT_REPO_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "EXT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
- uses: actions/checkout@v6.0.2
name: Checkout AI Lab - Workflow Dispatch
if: github.event_name == 'workflow_dispatch'
with:
repository: ${{ env.EXT_FORK }}/${{ env.EXT_REPO }}
ref: ${{ env.EXT_BRANCH }}
path: podman-desktop-extension-ai-lab
- uses: actions/checkout@v6.0.2
name: Checkout AI Lab - Push or Schedule
if: github.event_name == 'push' || github.event_name == 'schedule'
with:
path: podman-desktop-extension-ai-lab
- uses: actions/checkout@v6.0.2
name: Checkout Podman Desktop
with:
repository: ${{ env.PD_FORK }}/${{ env.PD_REPO }}
ref: ${{ env.PD_BRANCH }}
path: podman-desktop
- uses: pnpm/action-setup@v5
name: Install pnpm
with:
run_install: false
package_json_file: ./podman-desktop/package.json
- uses: actions/setup-node@v6
with:
node-version: 24
cache: 'pnpm'
cache-dependency-path: |
./podman-desktop
./podman-desktop-extension-ai-lab
- name: Update podman
run: |
echo "ubuntu version from kubic repository to install podman we need (v5)"
ubuntu_version='23.10'
echo "Add unstable kubic repo into list of available sources and get the repo key"
sudo sh -c "echo 'deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list"
curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add -
echo "Updating database of packages..."
sudo apt-get update -qq
echo "install necessary dependencies for criu package which is not part of ${ubuntu_version}"
sudo apt-get install -qq libprotobuf32t64 python3-protobuf libnet1
echo "install criu manually from static location"
curl -sLO http://archive.ubuntu.com/ubuntu/pool/universe/c/criu/criu_3.16.1-2_amd64.deb && sudo dpkg -i criu_3.16.1-2_amd64.deb
echo "installing/update podman package..."
sudo apt-get -qq -y install podman || { echo "Start fallback steps for podman nightly installation from a static mirror" && \
sudo sh -c "echo 'deb http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" && \
curl -L "http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - && \
sudo apt-get update && \
sudo apt-get -y install podman; }
podman version
- name: Revert unprivileged user namespace restrictions in Ubuntu 24.04
run: |
# allow unprivileged user namespace
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
- name: Set cgroup_manager to 'cgroupfs' instead of systemd
run: |
mkdir -p ~/.config/containers
cat <> ~/.config/containers/containers.conf
[engine]
cgroup_manager="cgroupfs"
EOT
podman info
- name: Execute pnpm
working-directory: ./podman-desktop
run: pnpm install --frozen-lockfile
- name: Build Podman Desktop for E2E tests
working-directory: ./podman-desktop
run: pnpm test:e2e:build
- name: Ensure getting current HEAD version of the test framework
working-directory: ./podman-desktop-extension-ai-lab/tests/playwright
run: pnpm add -D @podman-desktop/tests-playwright@next
- name: Execute pnpm in AI Lab Extension
working-directory: ./podman-desktop-extension-ai-lab
run: pnpm install
- name: Build Image
working-directory: ./podman-desktop-extension-ai-lab
id: build-image
run: |
pnpm build
podman build -t local_ai_lab_image ./
CONTAINER_ID=$(podman create localhost/local_ai_lab_image --entrypoint "")
mkdir -p tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins
podman export $CONTAINER_ID | tar -x -C tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins/
podman rm -f $CONTAINER_ID
podman rmi -f localhost/local_ai_lab_image:latest
- name: Free up disk space
uses: podman-desktop/e2e/.github/actions/disk-cleanup@6a406f8f24bacffc481553266f9ba8a5293f3077
- name: Run All E2E tests
working-directory: ./podman-desktop-extension-ai-lab
env:
PODMAN_DESKTOP_ARGS: ${{ github.workspace }}/podman-desktop
EXTENSION_PREINSTALLED: true
run: pnpm test:e2e
- name: Publish Test Report
uses: mikepenz/action-junit-report@v6
if: always()
with:
annotate_only: true
fail_on_failure: true
include_passed: true
detailed_summary: true
require_tests: true
report_paths: '**/*results.xml'
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-tests
path: |
./**/tests/**/output/
!./**/*.gguf
!./**/*.bin
!./**/output/videos/*
!./**/output/traces/*
- name: Upload test videos
uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-tests-videos
path: ./**/output/videos/*
- name: Upload test traces
uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-tests-traces
path: ./**/output/traces/*
================================================
FILE: .github/workflows/llama-stack-playground.yaml
================================================
#
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: llama-stack-playground
on:
workflow_dispatch:
inputs:
version:
description: 'llama-stack tag to use (e.g. main, v0.2.8,...)'
type: string
required: true
jobs:
publish:
name: publish
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 #v5.0.1
with:
repository: meta-llama/llama-stack
ref: ${{ github.event.inputs.version }}
- name: Install qemu dependency
run: |
sudo apt-get update
sudo apt-get install -y qemu-user-static
- name: Build manifest and images
run: |
podman manifest create quay.io/podman-ai-lab/llama-stack-playground:${{ github.event.inputs.version }}
podman build --platform linux/amd64,linux/arm64 llama_stack/distribution/ui --manifest quay.io/podman-ai-lab/llama-stack-playground:${{ github.event.inputs.version }}
- name: Login to quay.io
run: podman login quay.io --username ${{ secrets.QUAY_USERNAME }} --password ${{ secrets.QUAY_PASSWORD }}
- name: Push manifest and images to quay.io
run: podman manifest push quay.io/podman-ai-lab/llama-stack-playground:${{ github.event.inputs.version }}
================================================
FILE: .github/workflows/pr-check.yaml
================================================
#
# Copyright (C) 2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: pr-check
on: [pull_request]
jobs:
lint-format-unit:
name: linter, formatters and unit tests / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
os: [windows-2022, ubuntu-22.04, macos-14]
steps:
- uses: actions/checkout@v6.0.2
- uses: pnpm/action-setup@v5
name: Install pnpm
with:
run_install: false
- uses: actions/setup-node@v6
with:
node-version: 24
cache: 'pnpm'
- name: Execute pnpm
run: pnpm install
- name: Run linter
run: pnpm lint:check
- name: Run formatter
run: pnpm format:check
- name: Run unit tests
run: pnpm test:unit
- name: Run typecheck
run: pnpm typecheck
- name: Run svelte check
run: pnpm svelte:check
# Check we don't have changes in git
- name: Check no changes in git
if: ${{ matrix.os=='ubuntu-22.04'}}
run: |
if ! git diff --exit-code; then
echo "Found changes in git"
exit 1
fi
e2e-pr-check:
name: e2e tests smoke
runs-on: ubuntu-24.04
env:
SKIP_INSTALLATION: true
steps:
- uses: actions/checkout@v6.0.2
with:
path: podman-desktop-extension-ai-lab
# Set up pnpm
- uses: pnpm/action-setup@v5
name: Install pnpm
with:
run_install: false
package_json_file: ./podman-desktop-extension-ai-lab/package.json
# Install Node.js
- uses: actions/setup-node@v6
with:
node-version: 24
# Checkout podman desktop
- uses: actions/checkout@v6.0.2
with:
repository: containers/podman-desktop
ref: main
path: podman-desktop
- name: Update podman
run: |
echo "ubuntu version from kubic repository to install podman we need (v5)"
ubuntu_version='23.10'
echo "Add unstable kubic repo into list of available sources and get the repo key"
sudo sh -c "echo 'deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list"
curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add -
echo "Updating database of packages..."
sudo apt-get update -qq
echo "install necessary dependencies for criu package which is not part of ${ubuntu_version}"
sudo apt-get install -qq libprotobuf32t64 python3-protobuf libnet1
echo "install criu manually from static location"
curl -sLO http://archive.ubuntu.com/ubuntu/pool/universe/c/criu/criu_3.16.1-2_amd64.deb && sudo dpkg -i criu_3.16.1-2_amd64.deb
echo "installing/update podman package..."
sudo apt-get -qq -y install podman || { echo "Start fallback steps for podman nightly installation from a static mirror" && \
sudo sh -c "echo 'deb http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" && \
curl -L "http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - && \
sudo apt-get update && \
sudo apt-get -y install podman; }
podman version
- name: Revert unprivileged user namespace restrictions in Ubuntu 24.04
run: |
# allow unprivileged user namespace
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
- name: Set cgroup_manager to 'cgroupfs' instead of systemd
run: |
mkdir -p ~/.config/containers
cat <> ~/.config/containers/containers.conf
[engine]
cgroup_manager="cgroupfs"
EOT
podman info
- name: Install pnpm deps and build Podman Desktop
working-directory: ./podman-desktop
run: |
pnpm install --frozen-lockfile
pnpm test:e2e:build
- name: Ensure getting current HEAD version of the test framework
working-directory: ./podman-desktop-extension-ai-lab/tests/playwright
run: |
# workaround for https://github.com/containers/podman-desktop-extension-bootc/issues/712
version=$(npm view @podman-desktop/tests-playwright@next version)
echo "Version of @podman-desktop/tests-playwright to be used: $version"
jq --arg version "$version" '.devDependencies."@podman-desktop/tests-playwright" = $version' package.json > package.json_tmp && mv package.json_tmp package.json
- name: Execute pnpm in AI Lab Extension
working-directory: ./podman-desktop-extension-ai-lab
run: pnpm install --no-frozen-lockfile
- name: Build Image
working-directory: ./podman-desktop-extension-ai-lab
id: build-image
run: |
pnpm build
podman build -t local_ai_lab_image ./
CONTAINER_ID=$(podman create localhost/local_ai_lab_image --entrypoint "")
mkdir -p tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins
podman export $CONTAINER_ID | tar -x -C tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins/
podman rm -f $CONTAINER_ID
podman rmi -f localhost/local_ai_lab_image:latest
- name: Free up disk space
uses: podman-desktop/e2e/.github/actions/disk-cleanup@6a406f8f24bacffc481553266f9ba8a5293f3077
- name: Run E2E Smoke tests
working-directory: ./podman-desktop-extension-ai-lab
env:
PODMAN_DESKTOP_ARGS: ${{ github.workspace }}/podman-desktop
EXTENSION_PREINSTALLED: true
run: pnpm test:e2e:smoke
- name: Publish Test Report
uses: mikepenz/action-junit-report@v6
if: always()
with:
annotate_only: true
fail_on_failure: true
include_passed: true
detailed_summary: true
require_tests: true
report_paths: '**/*results.xml'
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-pr-check
path: |
./**/tests/**/output/
!./**/*.gguf
!./**/*.bin
!./**/output/videos/*
!./**/output/traces/*
- name: Upload test videos
uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-pr-check-videos
path: ./**/output/videos/*
- name: Upload test traces
uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-pr-check-traces
path: ./**/output/traces/*
================================================
FILE: .github/workflows/ramalama.yaml
================================================
#
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: ramalama
on:
schedule:
- cron: '0 2 * * *'
workflow_dispatch:
inputs:
tag:
default: 'latest'
description: 'Ramalama images tag to use'
type: string
required: true
jobs:
e2e-check:
name: e2e tests
runs-on: ubuntu-24.04
env:
SKIP_INSTALLATION: true
steps:
- uses: actions/checkout@v6.0.2
with:
path: podman-desktop-extension-ai-lab
# Set up pnpm
- uses: pnpm/action-setup@v5
name: Install pnpm
with:
run_install: false
package_json_file: ./podman-desktop-extension-ai-lab/package.json
# Install Node.js
- uses: actions/setup-node@v6
with:
node-version: 24
# Checkout podman desktop
- uses: actions/checkout@v6.0.2
with:
repository: podman-desktop/podman-desktop
ref: main
path: podman-desktop
- name: Update podman
run: |
echo "ubuntu version from kubic repository to install podman we need (v5)"
ubuntu_version='23.10'
echo "Add unstable kubic repo into list of available sources and get the repo key"
sudo sh -c "echo 'deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list"
curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add -
echo "Updating database of packages..."
sudo apt-get update -qq
echo "install necessary dependencies for criu package which is not part of ${ubuntu_version}"
sudo apt-get install -qq libprotobuf32t64 python3-protobuf libnet1
echo "install criu manually from static location"
curl -sLO http://archive.ubuntu.com/ubuntu/pool/universe/c/criu/criu_3.16.1-2_amd64.deb && sudo dpkg -i criu_3.16.1-2_amd64.deb
echo "installing/update podman package..."
sudo apt-get -qq -y install podman || { echo "Start fallback steps for podman nightly installation from a static mirror" && \
sudo sh -c "echo 'deb http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list" && \
curl -L "http://ftp.lysator.liu.se/pub/opensuse/repositories/devel:/kubic:/libcontainers:/unstable/xUbuntu_${ubuntu_version}/Release.key" | sudo apt-key add - && \
sudo apt-get update && \
sudo apt-get -y install podman; }
podman version
- name: Revert unprivileged user namespace restrictions in Ubuntu 24.04
run: |
# allow unprivileged user namespace
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
- name: Set cgroup_manager to 'cgroupfs' instead of systemd
run: |
mkdir -p ~/.config/containers
cat <> ~/.config/containers/containers.conf
[engine]
cgroup_manager="cgroupfs"
EOT
podman info
- name: Install pnpm deps and build Podman Desktop
working-directory: ./podman-desktop
run: |
pnpm install --frozen-lockfile
pnpm test:e2e:build
- name: Ensure getting current HEAD version of the test framework
working-directory: ./podman-desktop-extension-ai-lab/tests/playwright
run: |
# workaround for https://github.com/podman-desktop/podman-desktop-extension-bootc/issues/712
version=$(npm view @podman-desktop/tests-playwright@next version)
echo "Version of @podman-desktop/tests-playwright to be used: $version"
jq --arg version "$version" '.devDependencies."@podman-desktop/tests-playwright" = $version' package.json > package.json_tmp && mv package.json_tmp package.json
- name: Execute pnpm in AI Lab Extension
working-directory: ./podman-desktop-extension-ai-lab
run: pnpm install --no-frozen-lockfile
- name: Update ramalama image references in AI Lab Extension
working-directory: ./podman-desktop-extension-ai-lab
run: sed -i -E "s/(@sha256:[0-9a-f]+)/:${{ github.event_name != 'workflow_dispatch' && 'latest' || github.event.inputs.tag }}/g" packages/backend/src/assets/inference-images.json
- name: Build Image
working-directory: ./podman-desktop-extension-ai-lab
id: build-image
run: |
pnpm build
podman build -t local_ai_lab_image ./
CONTAINER_ID=$(podman create localhost/local_ai_lab_image --entrypoint "")
mkdir -p tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins
podman export $CONTAINER_ID | tar -x -C tests/playwright/tests/playwright/output/ai-lab-tests-pd/plugins/
podman rm -f $CONTAINER_ID
podman rmi -f localhost/local_ai_lab_image:latest
- name: Free up disk space
uses: podman-desktop/e2e/.github/actions/disk-cleanup@6a406f8f24bacffc481553266f9ba8a5293f3077
- name: Run E2E tests
working-directory: ./podman-desktop-extension-ai-lab
env:
PODMAN_DESKTOP_ARGS: ${{ github.workspace }}/podman-desktop
EXTENSION_PREINSTALLED: true
run: pnpm test:e2e
- name: Publish Test Report
uses: mikepenz/action-junit-report@v6
if: always()
with:
annotate_only: true
fail_on_failure: true
include_passed: true
detailed_summary: true
require_tests: true
report_paths: '**/*results.xml'
- uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-check
path: |
./**/tests/**/output/
!./**/*.gguf
!./**/*.bin
!./**/output/videos/*
!./**/output/traces/*
- name: Upload test videos
uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-check-videos
path: ./**/output/videos/*
- name: Upload test traces
uses: actions/upload-artifact@v7
if: always()
with:
name: e2e-check-traces
path: ./**/output/traces/*
================================================
FILE: .github/workflows/recipe-catalog-change-cleanup.yaml
================================================
name: recipe-catalog-change-cleanup
on:
workflow_run:
workflows: ["recipe-catalog-change-windows-trigger"]
types:
- completed
jobs:
extract-context:
runs-on: ubuntu-24.04
outputs:
extract-context: ${{ steps.prepare-context.outputs.extract-context }}
trigger-template: ${{ steps.prepare-context.outputs.trigger-template }}
steps:
- name: Prepare context
id: prepare-context
env:
WORKFLOW_RUN: ${{ toJson(github.event.workflow_run) }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "Workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}"
echo "Fork owner: ${{ fromJson(env.WORKFLOW_RUN).head_repository.owner.login }}"
echo "Fork repo: ${{ fromJson(env.WORKFLOW_RUN).head_repository.name }}"
echo "Fork branch: ${{ fromJson(env.WORKFLOW_RUN).head_branch }}"
echo "Commit SHA: ${{ fromJson(env.WORKFLOW_RUN).head_sha }}"
echo "Base repo: ${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}"
echo "Conclusion: ${{ fromJson(env.WORKFLOW_RUN).conclusion }}"
# Fetch job conclusions using the GitHub CLI
echo "Fetching jobs for workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}"
gh api \
repos/${{ github.repository }}/actions/runs/${{ fromJson(env.WORKFLOW_RUN).id }}/jobs \
--jq '.jobs[] | "\(.name)=\(.conclusion)"' | while read -r line; do
echo "$line" >> $GITHUB_OUTPUT
done
cat $GITHUB_OUTPUT
cleanup:
runs-on: ubuntu-24.04
needs: extract-context
if: ${{ github.event.workflow_run.conclusion == 'skipped' || (github.event.workflow_run.conclusion == 'success' && needs.extract-context.outputs.trigger-template == 'skipped') }}
steps:
- name: Remove skipped or cancelled workflow run
env:
WORKFLOW_RUN: ${{ toJson(github.event.workflow_run) }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "Cleaning up workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}"
gh run delete ${{ fromJson(env.WORKFLOW_RUN).id }} --repo ${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}
echo "Workflow run ID ${{ fromJson(env.WORKFLOW_RUN).id }} has been cleaned up."
================================================
FILE: .github/workflows/recipe-catalog-change-template.yaml
================================================
name: Run recipe tests on catalog change
on:
workflow_call:
inputs:
trigger-workflow-run-id:
required: true
type: string
trigger-workflow-fork:
required: true
type: string
trigger-workflow-repo-name:
required: true
type: string
trigger-workflow-branch:
required: true
type: string
trigger-workflow-commit-sha:
required: true
type: string
trigger-workflow-base-repo:
required: true
type: string
pd-fork:
required: false
type: string
pd-branch:
required: false
type: string
pd-env-vars:
required: false
type: string
podman-options:
required: false
type: string
podman-download-url:
required: false
type: string
ext_tests_options:
required: false
type: string
npm-target:
required: false
type: string
pde2e-image-version:
required: false
type: string
mapt_params:
required: false
type: string
jobs:
windows:
name: recipe-catalog-windows-${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
windows-version: ['11']
windows-featurepack: ['25h2-ent']
steps:
- name: Add PR check status
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
status_context="catalog-change-windows-matrix-${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}"
echo "status_context=${status_context}" >> "$GITHUB_ENV"
set -xuo
# Status msg
data="{\"state\":\"pending\""
data="${data},\"description\":\"Running recipe tests on catalog change on Windows ${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}\""
data="${data},\"context\":\"$status_context\""
data="${data},\"target_url\":\"https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}"
# Create status by API call
curl -L -v -X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ env.GH_TOKEN }}" \
https://api.github.com/repos/${{ inputs.trigger-workflow-base-repo }}/statuses/${{ inputs.trigger-workflow-commit-sha }} \
-d "${data}"
- name: Get Podman version used by Podman Desktop
run: |
version=$(curl https://raw.githubusercontent.com/containers/podman-desktop/main/extensions/podman/packages/extension/src/podman5.json | jq -r '.version')
echo "Default Podman Version from Podman Desktop: ${version}"
echo "PD_PODMAN_VERSION=${version}" >> $GITHUB_ENV
- name: Set the default env. variables
env:
DEFAULT_FORK: 'containers'
DEFAULT_BRANCH: 'main'
DEFAULT_NPM_TARGET: 'test:e2e'
DEFAULT_ENV_VARS: 'TEST_PODMAN_MACHINE=true,ELECTRON_ENABLE_INSPECT=true'
DEFAULT_PODMAN_OPTIONS: 'INIT=1,START=1,ROOTFUL=1,NETWORKING=0'
DEFAULT_EXT_TESTS_OPTIONS: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=1,EXT_TEST_GPU_SUPPORT_ENABLED=0'
DEFAULT_EXT_REPO_OPTIONS: 'REPO=podman-desktop-extension-ai-lab,FORK=containers,BRANCH=main'
DEFAULT_PODMAN_VERSION: "${{ env.PD_PODMAN_VERSION || '5.3.2' }}"
DEFAULT_URL: "https://github.com/containers/podman/releases/download/v$DEFAULT_PODMAN_VERSION/podman-$DEFAULT_PODMAN_VERSION-setup.exe"
DEFAULT_PDE2E_IMAGE_VERSION: 'v0.0.3-windows'
DEFAULT_MAPT_PARAMS: "IMAGE=${{ vars.MAPT_IMAGE || 'quay.io/redhat-developer/mapt' }};VERSION_TAG=${{ vars.MAPT_VERSION_TAG || 'v0.9.7' }};CPUS=${{ vars.MAPT_CPUS || '4' }};MEMORY=${{ vars.MAPT_MEMORY || '32' }};EXCLUDED_REGIONS=\"${{ vars.MAPT_EXCLUDED_REGIONS || 'westindia,centralindia,southindia,australiacentral,australiacentral2,australiaeast,australiasoutheast,southafricanorth,southafricawest' }}\""
run: |
echo "FORK=${{ inputs.pd-fork || env.DEFAULT_FORK }}" >> $GITHUB_ENV
echo "BRANCH=${{ inputs.pd-branch || env.DEFAULT_BRANCH }}" >> $GITHUB_ENV
echo "NPM_TARGET=${{ inputs.npm-target || env.DEFAULT_NPM_TARGET }}" >> $GITHUB_ENV
echo "ENV_VARS=${{ inputs.pd-env-vars || env.DEFAULT_ENV_VARS }}" >> $GITHUB_ENV
echo "PODMAN_URL=${{ inputs.podman-download-url || env.DEFAULT_URL }}" >> $GITHUB_ENV
echo "PDE2E_IMAGE_VERSION=${{ inputs.pde2e-image-version || env.DEFAULT_PDE2E_IMAGE_VERSION }}" >> $GITHUB_ENV
if [[ -z "${{ inputs.trigger-workflow-repo-name }}" ]] && [[ -z "${{ inputs.trigger-workflow-fork }}" ]] && [[ -z "${{ inputs.trigger-workflow-branch }}" ]]; then
echo "DEFAULT_EXT_REPO_OPTIONS=REPO=${{ inputs.trigger-workflow-repo-name }},FORK=${{ inputs.trigger-workflow-fork }},BRANCH=${{ inputs.trigger-workflow-branch }}" >> $GITHUB_ENV
fi
echo "${{ github.event.inputs.ext_tests_options || env.DEFAULT_EXT_TESTS_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ env.DEFAULT_PODMAN_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "PODMAN_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ inputs.podman-options || env.DEFAULT_EXT_REPO_OPTIONS }}" | awk -F ',' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "EXT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
echo "${{ github.event.inputs.mapt_params || env.DEFAULT_MAPT_PARAMS }}" | awk -F ';' \
'{for (i=1; i<=NF; i++) {split($i, kv, "="); print "MAPT_"kv[1]"="kv[2]}}' >> $GITHUB_ENV
- name: Create instance
run: |
# Create instance
podman run -d --name windows-create --rm \
-v ${PWD}:/workspace:z \
-e ARM_TENANT_ID=${{ secrets.ARM_TENANT_ID }} \
-e ARM_SUBSCRIPTION_ID=${{ secrets.ARM_SUBSCRIPTION_ID }} \
-e ARM_CLIENT_ID=${{ secrets.ARM_CLIENT_ID }} \
-e ARM_CLIENT_SECRET='${{ secrets.ARM_CLIENT_SECRET }}' \
--user 0 \
${{ env.MAPT_IMAGE }}:${{ env.MAPT_VERSION_TAG }} azure \
windows create \
--project-name 'windows-desktop' \
--backed-url 'file:///workspace' \
--conn-details-output '/workspace' \
--windows-version '${{ matrix.windows-version }}' \
--windows-featurepack '${{ matrix.windows-featurepack }}' \
--cpus ${{ env.MAPT_CPUS }} \
--memory ${{ env.MAPT_MEMORY }} \
--nested-virt \
--tags project=podman-desktop \
--spot-excluded-regions ${{ env.MAPT_EXCLUDED_REGIONS }} \
--spot
# Check logs
podman logs -f windows-create
- name: Check instance system info
run: |
ssh -i id_rsa \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=30 \
-o ServerAliveCountMax=1200 \
$(cat username)@$(cat host) "systeminfo"
- name: Emulate X session
run: |
# use fake rdp to emulate an active x session
podman run -d --name x-session \
-e RDP_HOST=$(cat host) \
-e RDP_USER=$(cat username) \
-e RDP_PASSWORD=$(cat userpassword) \
quay.io/rhqp/frdp:v0.0.1
# Wait until the x session has been created
podman wait --condition running x-session
# Check logs for the x session
podman logs x-session
- name: Download Podman, do not initialize
run: |
podman run --rm -d --name pde2e-podman-run \
-e TARGET_HOST=$(cat host) \
-e TARGET_HOST_USERNAME=$(cat username) \
-e TARGET_HOST_KEY_PATH=/data/id_rsa \
-e TARGET_FOLDER=pd-e2e \
-e TARGET_CLEANUP=false \
-e TARGET_RESULTS=results \
-e OUTPUT_FOLDER=/data \
-e DEBUG=true \
-v $PWD:/data:z \
quay.io/odockal/pde2e-podman:${{ env.PDE2E_IMAGE_VERSION }} \
pd-e2e/podman.ps1 \
-downloadUrl ${{ env.PODMAN_URL }} \
-targetFolder pd-e2e \
-resultsFolder results \
-initialize 0 \
-rootful 0 \
-start 0 \
-installWSL 0
# check logs
podman logs -f pde2e-podman-run
- name: Build Podman Desktop Electron Inspect Enabled binary
run: |
podman run --rm -d --name pde2e-builder-run \
-e TARGET_HOST=$(cat host) \
-e TARGET_HOST_USERNAME=$(cat username) \
-e TARGET_HOST_KEY_PATH=/data/id_rsa \
-e TARGET_FOLDER=pd-e2e \
-e TARGET_CLEANUP=false \
-e TARGET_RESULTS=results \
-e OUTPUT_FOLDER=/data \
-e DEBUG=true \
-v $PWD:/data:z \
quay.io/odockal/pde2e-builder:${{ env.PDE2E_IMAGE_VERSION }} \
pd-e2e/builder.ps1 \
-targetFolder pd-e2e \
-resultsFolder results \
-fork ${{ env.FORK }} \
-branch ${{ env.BRANCH }} \
-envVars ${{ env.ENV_VARS }}
# check logs
podman logs -f pde2e-builder-run
- name: Run Podman Desktop Playwright E2E tests
run: |
podman run -d --name pde2e-runner-run \
-e TARGET_HOST=$(cat host) \
-e TARGET_HOST_USERNAME=$(cat username) \
-e TARGET_HOST_KEY_PATH=/data/id_rsa \
-e TARGET_FOLDER=pd-e2e \
-e TARGET_RESULTS=results \
-e OUTPUT_FOLDER=/data \
-e DEBUG=true \
-v $PWD:/data:z \
quay.io/odockal/pde2e-runner:${{ env.PDE2E_IMAGE_VERSION }} \
pd-e2e/runner.ps1 \
-targetFolder pd-e2e \
-resultsFolder results \
-podmanPath $(cat results/podman-location.log) \
-pdPath "$(cat results/pde2e-binary-path.log | tr '\n' " ")" \
-fork ${{ env.FORK }} \
-branch ${{ env.BRANCH }} \
-extRepo ${{ env.EXT_REPO }} \
-extFork ${{ env.EXT_FORK }} \
-extBranch ${{ env.EXT_BRANCH }} \
-extTests ${{ env.EXT_RUN_TESTS_FROM_EXTENSION }} \
-npmTarget ${{ env.NPM_TARGET }} \
-initialize ${{ env.PODMAN_INIT }} \
-rootful ${{ env.PODMAN_ROOTFUL }} \
-start ${{ env.PODMAN_START }} \
-userNetworking ${{ env.PODMAN_NETWORKING }} \
-envVars ${{ env.ENV_VARS }} \
-runAsAdmin ${{ env.EXT_RUN_TESTS_AS_ADMIN }}
# check logs
podman logs -f pde2e-runner-run
- name: Publish Test Report
id: test-report
uses: mikepenz/action-junit-report@v6
if: always() # always run even if the previous step fails
with:
annotate_only: true
fail_on_failure: true
include_passed: true
detailed_summary: true
require_tests: true
report_paths: '**/*results.xml'
- name: Update status of the PR check
if: always()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -xuo
# Status msg
data="{\"state\":\"success\""
if [[ ${{ steps.test-report.outcome }} != "success" ]]; then
data="{\"state\":\"failure\""
fi
data="${data},\"description\":\"Finished recipe tests on catalog change on Windows ${{ matrix.windows-version }}-${{ matrix.windows-featurepack }}\""
data="${data},\"context\":\"${{ env.status_context }}\""
data="${data},\"target_url\":\"https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}"
# Create status by API call
curl -L -v -X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ env.GH_TOKEN }}" \
https://api.github.com/repos/${{ inputs.trigger-workflow-base-repo }}/statuses/${{ inputs.trigger-workflow-commit-sha }} \
-d "${data}"
- name: Destroy instance
if: always()
run: |
# Destroy instance
podman run -d --name windows-destroy --rm \
-v ${PWD}:/workspace:z \
-e ARM_TENANT_ID=${{ secrets.ARM_TENANT_ID }} \
-e ARM_SUBSCRIPTION_ID=${{ secrets.ARM_SUBSCRIPTION_ID }} \
-e ARM_CLIENT_ID=${{ secrets.ARM_CLIENT_ID }} \
-e ARM_CLIENT_SECRET='${{ secrets.ARM_CLIENT_SECRET }}' \
--user 0 \
${{ env.MAPT_IMAGE }}:${{ env.MAPT_VERSION_TAG }} azure \
windows destroy \
--project-name 'windows-desktop' \
--backed-url 'file:///workspace'
# Check logs
podman logs -f windows-destroy
- name: Upload test artifacts
uses: actions/upload-artifact@v7
if: always()
with:
name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}
path: |
results/*
!./**/*.gguf
!./**/*.bin
!./**/output/videos/*
!./**/output/traces/*
- name: Upload test videos
uses: actions/upload-artifact@v7
if: always()
with:
name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-videos
path: ./**/output/videos/*
- name: Upload test traces
uses: actions/upload-artifact@v7
if: always()
with:
name: results-e2e-${{ matrix.windows-version }}${{ matrix.windows-featurepack }}-traces
path: ./**/output/traces/*
================================================
FILE: .github/workflows/recipe-catalog-change-trigger.yaml
================================================
name: recipe-catalog-change-windows-trigger
on:
workflow_run:
workflows: ["pr-check"]
types:
- completed
jobs:
extract-context:
runs-on: ubuntu-24.04
if: ${{ github.event.workflow_run.conclusion == 'success' }}
outputs:
workflow-run-id: ${{ steps.parse-event.outputs.workflow-run-id }}
fork-owner: ${{ steps.parse-event.outputs.fork-owner }}
fork-repo: ${{ steps.parse-event.outputs.fork-repo }}
fork-branch: ${{ steps.parse-event.outputs.fork-branch }}
commit-sha: ${{ steps.parse-event.outputs.commit-sha }}
base-repo: ${{ steps.parse-event.outputs.base-repo }}
changes-detected: ${{ steps.parse-event.outputs.changes-detected }}
steps:
- name: Parse event data
id: parse-event
env:
WORKFLOW_RUN: ${{ toJson(github.event.workflow_run) }}
GH_TOKEN: ${{ github.token }}
run: |
echo "Workflow run ID: ${{ fromJson(env.WORKFLOW_RUN).id }}"
echo "workflow-run-id=${{ fromJson(env.WORKFLOW_RUN).id }}" >> $GITHUB_OUTPUT
echo "Fork owner: ${{ fromJson(env.WORKFLOW_RUN).head_repository.owner.login }}"
echo "fork-owner=${{ fromJson(env.WORKFLOW_RUN).head_repository.owner.login }}" >> $GITHUB_OUTPUT
echo "Fork repo: ${{ fromJson(env.WORKFLOW_RUN).head_repository.name }}"
echo "fork-repo=${{ fromJson(env.WORKFLOW_RUN).head_repository.name }}" >> $GITHUB_OUTPUT
echo "Fork branch: ${{ fromJson(env.WORKFLOW_RUN).head_branch }}"
echo "fork-branch=${{ fromJson(env.WORKFLOW_RUN).head_branch }}" >> $GITHUB_OUTPUT
echo "Commit SHA: ${{ fromJson(env.WORKFLOW_RUN).head_sha }}"
echo "commit-sha=${{ fromJson(env.WORKFLOW_RUN).head_sha }}" >> $GITHUB_OUTPUT
echo "Base repo: ${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}"
echo "base-repo=${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}" >> $GITHUB_OUTPUT
git clone "https://www.github.com/${{ fromJson(env.WORKFLOW_RUN).repository.full_name }}" "${{ fromJson(env.WORKFLOW_RUN).repository.name }}" --depth 1
cd "${{ fromJson(env.WORKFLOW_RUN).repository.name }}"
git remote add upstream "https://www.github.com/${{ fromJson(env.WORKFLOW_RUN).head_repository.full_name }}"
git fetch upstream
git diff --name-only upstream/${{ fromJson(env.WORKFLOW_RUN).head_branch }} HEAD > changes.txt
if grep -qe 'packages/backend/src/assets/ai.json' changes.txt; then
echo "Changes detected in ai.json"
echo "changes-detected=true" >> $GITHUB_OUTPUT
else
echo "No changes detected in ai.json"
echo "changes-detected=false" >> $GITHUB_OUTPUT
fi
trigger-template:
needs: extract-context
uses: containers/podman-desktop-extension-ai-lab/.github/workflows/recipe-catalog-change-template.yaml@main
if: ${{ needs.extract-context.outputs.changes-detected == 'true' }}
strategy:
fail-fast: false
with:
trigger-workflow-run-id: ${{ needs.extract-context.outputs.workflow-run-id }}
trigger-workflow-fork: ${{ needs.extract-context.outputs.fork-owner }}
trigger-workflow-repo-name: ${{ needs.extract-context.outputs.fork-repo }}
trigger-workflow-branch: ${{ needs.extract-context.outputs.fork-branch }}
trigger-workflow-commit-sha: ${{ needs.extract-context.outputs.commit-sha }}
trigger-workflow-base-repo: ${{ needs.extract-context.outputs.base-repo }}
ext_tests_options: 'EXT_RUN_TESTS_FROM_EXTENSION=1,EXT_RUN_TESTS_AS_ADMIN=0,EXT_TEST_GPU_SUPPORT_ENABLED=0'
secrets: inherit
================================================
FILE: .github/workflows/release.yaml
================================================
#
# Copyright (C) 2024-2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
name: release
on:
workflow_dispatch:
inputs:
version:
description: 'Version to release'
required: true
branch:
description: 'Branch to use for the release'
required: true
default: main
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
jobs:
tag:
name: Tagging
runs-on: ubuntu-24.04
outputs:
githubTag: ${{ steps.TAG_UTIL.outputs.githubTag}}
extVersion: ${{ steps.TAG_UTIL.outputs.extVersion}}
releaseId: ${{ steps.create_release.outputs.id}}
steps:
- uses: actions/checkout@v6.0.2
with:
ref: ${{ github.event.inputs.branch }}
- name: Generate tag utilities
id: TAG_UTIL
run: |
TAG_PATTERN=${{ github.event.inputs.version }}
echo "githubTag=v$TAG_PATTERN" >> ${GITHUB_OUTPUT}
echo "extVersion=$TAG_PATTERN" >> ${GITHUB_OUTPUT}
- name: tag
run: |
git config --local user.name ${{ github.actor }}
# Add the new version in package.json file
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" package.json
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" packages/backend/package.json
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" packages/frontend/package.json
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${{ steps.TAG_UTIL.outputs.extVersion }}\",#g" tests/playwright/package.json
git add package.json
git add packages/backend/package.json
git add packages/frontend/package.json
git add tests/playwright/package.json
# commit the changes
git commit -m "chore: 🥁 tagging ${{ steps.TAG_UTIL.outputs.githubTag }} 🥳"
echo "Tagging with ${{ steps.TAG_UTIL.outputs.githubTag }}"
git tag ${{ steps.TAG_UTIL.outputs.githubTag }}
git push origin ${{ steps.TAG_UTIL.outputs.githubTag }}
- name: Create Release
id: create_release
uses: ncipollo/release-action@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag: ${{ steps.TAG_UTIL.outputs.githubTag }}
name: ${{ steps.TAG_UTIL.outputs.githubTag }}
draft: true
prerelease: false
- name: Create the PR to bump the version in the main branch (only if we're tagging from main branch)
if: ${{ github.event.inputs.branch == 'main' }}
run: |
git config --local user.name ${{ github.actor }}
CURRENT_VERSION=$(echo "${{ steps.TAG_UTIL.outputs.extVersion }}")
tmp=${CURRENT_VERSION%.*}
minor=${tmp#*.}
bumpedVersion=${CURRENT_VERSION%%.*}.$((minor + 1)).0
bumpedBranchName="bump-to-${bumpedVersion}"
git checkout -b "${bumpedBranchName}"
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" package.json
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" packages/backend/package.json
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" packages/frontend/package.json
sed -i "s#version\":\ \"\(.*\)\",#version\":\ \"${bumpedVersion}-next\",#g" tests/playwright/package.json
git add package.json
git add packages/backend/package.json
git add packages/frontend/package.json
git add tests/playwright/package.json
git commit -s --amend -m "chore: bump version to ${bumpedVersion}"
git push origin "${bumpedBranchName}"
echo -e "📢 Bump version to ${bumpedVersion}\n\n${{ steps.TAG_UTIL.outputs.extVersion }} has been released.\n\n Time to switch to the new ${bumpedVersion} version 🥳" > /tmp/pr-title
pullRequestUrl=$(gh pr create --title "chore: 📢 Bump version to ${bumpedVersion}" --body-file /tmp/pr-title --head "${bumpedBranchName}" --base "main")
echo "📢 Pull request created: ${pullRequestUrl}"
echo "➡️ Flag the PR as being ready for review"
gh pr ready "${pullRequestUrl}"
echo "🔅 Mark the PR as being ok to be merged automatically"
gh pr merge "${pullRequestUrl}" --auto --rebase
git checkout ${{ steps.TAG_UTIL.outputs.githubTag }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build:
needs: [tag]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6.0.2
with:
ref: ${{ needs.tag.outputs.githubTag }}
- uses: pnpm/action-setup@v5
name: Install pnpm
with:
run_install: false
- uses: actions/setup-node@v6
with:
node-version: 24
cache: 'pnpm'
- name: Execute yarn
run: pnpm install
- name: Run Build
run: pnpm build
- name: Login to ghcr.io
run: podman login --username ${{ github.repository_owner }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
- name: Build Image
id: build-image
run: |
podman build -t ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:${{ needs.tag.outputs.extVersion }} .
podman push ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:${{ needs.tag.outputs.extVersion }}
podman tag ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:${{ needs.tag.outputs.extVersion }} ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:latest
podman push ghcr.io/${{ github.repository_owner }}/podman-desktop-extension-ai-lab:latest
release:
needs: [tag, build]
name: Release
runs-on: ubuntu-24.04
steps:
- name: id
run: echo the release id is ${{ needs.tag.outputs.releaseId}}
- name: Publish release
uses: StuYarrow/publish-release@v1.1.2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
id: ${{ needs.tag.outputs.releaseId}}
================================================
FILE: .github/workflows/update-ramalama-references.sh
================================================
#!/usr/bin/env bash
#
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Script to update ramalama image references in inference-images.json
set -euo pipefail
JSON_PATH="packages/backend/src/assets/inference-images.json"
TMP_JSON="${JSON_PATH}.tmp"
TAG=$1
# Images and their keys in the JSON
IMAGES=(
"whispercpp:ramalama/ramalama-whisper-server:default"
"llamacpp:ramalama/ramalama-llama-server:default"
"llamacpp:ramalama/cuda-llama-server:cuda"
"openvino:ramalama/openvino:default"
)
cp "$JSON_PATH" "$TMP_JSON"
for entry in "${IMAGES[@]}"; do
IFS=":" read -r key image jsonkey <<< "$entry"
digest=$(curl -s "https://quay.io/v2/$image/manifests/$TAG" -H 'Accept: application/vnd.oci.image.index.v1+json' --head | grep -i Docker-Content-Digest | awk -e '{ print $2 }' | tr -d '\r')
# Update the JSON file with the new digest
jq --arg img "quay.io/$image" --arg dig "$digest" --arg key "$key" --arg jsonkey "$jsonkey" \
'(.[$key][$jsonkey]) = ($img + "@" + $dig)' \
"$TMP_JSON" > "$TMP_JSON.new" && mv "$TMP_JSON.new" "$TMP_JSON"
done
# Compare and update if changed
if cmp -s "$JSON_PATH" "$TMP_JSON"; then
echo "No update needed: digests are up to date."
rm "$TMP_JSON"
exit 0
else
mv "$TMP_JSON" "$JSON_PATH"
echo "Updated inference-images.json with latest digests."
exit 10
fi
================================================
FILE: .github/workflows/update-ramalama-references.yaml
================================================
#
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# This workflow automatically updates ramalama image digests in inference-images.json
# and creates a pull request with the changes.
name: update-ramalama-references
on:
schedule:
- cron: '0 3 * * *' # Runs daily at 03:00 UTC
workflow_dispatch:
permissions:
contents: write
jobs:
update-references:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 # v5.0.1
- name: Get latest ramalama version
id: get_ramalama_version
run: |
RAMALAMA_VERSION=$(curl -s https://quay.io/v2/ramalama/ramalama-llama-server/tags/list -s | jq .tags[] | grep -E '^"[0-9]+\.[0-9]+\.[0-9]+"$' | sort -V | tail -n 1 | tr -d '"')
echo "RAMALAMA_VERSION=${RAMALAMA_VERSION}" >> $GITHUB_OUTPUT
- name: Check if PR already exists
id: pr_exists
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
with:
script: |
const branch = `update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}`;
const { data: pulls } = await github.rest.pulls.list({
owner: context.repo.owner,
repo: context.repo.repo,
head: `${context.repo.owner}:${branch}`,
state: 'open',
});
if (pulls.length > 0) {
core.setOutput('exists', 'true');
} else {
core.setOutput('exists', 'false');
}
- name: Update ramalama image references in inference-images.json
id: update_digests
if: steps.pr_exists.outputs.exists == 'false'
run: |
bash .github/workflows/update-ramalama-references.sh "${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}"
continue-on-error: true
- name: Commit changes
if: steps.pr_exists.outputs.exists == 'false' && steps.update_digests.outcome == 'failure'
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
git checkout -b "update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}"
git add packages/backend/src/assets/inference-images.json
git commit -m "chore: update ramalama image references ${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}"
git push origin "update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}"
- name: Create Pull Request
if: steps.pr_exists.outputs.exists == 'false' && steps.update_digests.outcome == 'failure'
run: |
echo -e "update ramalama image references to ${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" > /tmp/pr-title
pullRequestUrl=$(gh pr create --title "chore: update ramalama image references to ${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" --body-file /tmp/pr-title --head "update-ramalama-references-${{ steps.get_ramalama_version.outputs.RAMALAMA_VERSION }}" --base "main")
echo "📢 Pull request created: ${pullRequestUrl}"
echo "➡️ Flag the PR as being ready for review"
gh pr ready "${pullRequestUrl}"
env:
GITHUB_TOKEN: ${{ secrets.PODMAN_DESKTOP_BOT_TOKEN }}
================================================
FILE: .gitignore
================================================
node_modules
.DS_Store
dist
.eslintcache
**/coverage
.idea
output
================================================
FILE: .husky/commit-msg
================================================
#!/bin/sh
#
# Copyright (C) 2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
set -u
# avoid [[ which is not POSIX sh.
if test "$#" != 1; then
echo "$0 requires an argument."
exit 1
fi
if test ! -f "$1"; then
echo "file does not exist: $1"
exit 1
fi
pnpm commitlint --edit "$1"
SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
grep -qs "^$SOB" "$1" || echo "$SOB" >>"$1"
# Catches duplicate Signed-off-by lines.
test "" = "$(grep '^Signed-off-by: ' "$1" |
sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || {
echo >&2 Duplicate Signed-off-by lines.
exit 1
}
================================================
FILE: .husky/pre-commit
================================================
#
# Copyright (C) 2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
pnpm lint-staged
================================================
FILE: .npmrc
================================================
node-linker=hoisted
================================================
FILE: .prettierrc
================================================
{
"svelteSortOrder" : "options-styles-scripts-markup",
"svelteStrictMode": true,
"svelteAllowShorthand": false,
"svelteIndentScriptAndStyle": false,
"bracketSameLine": true,
"singleQuote": true,
"arrowParens": "avoid",
"printWidth": 120,
"trailingComma": "all",
"plugins": ["prettier-plugin-svelte"]
}
================================================
FILE: .vscode/settings.json
================================================
{
"typescript.preferences.importModuleSpecifier": "non-relative"
}
================================================
FILE: CODE-OF-CONDUCT.md
================================================
Podman Desktop Extension AI Lab Project Community Code of Conduct
The Podman Desktop Extension AI Lab Project follows the [Containers Community Code of Conduct](https://github.com/containers/common/blob/main/CODE-OF-CONDUCT.md).
================================================
FILE: Containerfile
================================================
#
# Copyright (C) 2024 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
FROM scratch as builder
COPY packages/backend/dist/ /extension/dist
COPY packages/backend/package.json /extension/
COPY packages/backend/media/ /extension/media
COPY LICENSE /extension/
COPY packages/backend/icon.png /extension/
COPY packages/backend/brain.woff2 /extension/
COPY README.md /extension/
COPY api/openapi.yaml /extension/api/
FROM scratch
LABEL org.opencontainers.image.title="AI Lab" \
org.opencontainers.image.description="AI Lab" \
org.opencontainers.image.vendor="Red Hat" \
io.podman-desktop.api.version=">= 1.8.0"
COPY --from=builder /extension /extension
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: MIGRATION.md
================================================
# Migration guide
## ℹ️ ApplicationCatalog
Before **Podman AI Lab** `v1.2.0` the [user-catalog](./PACKAGING-GUIDE.md#applicationcatalog) was not versioned.
Starting from `v1.2.0` the user-catalog require to have a `version` property.
> [!NOTE]
> The `user-catalog.json` file can be found in `~/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab`.
The list of catalog versions can be found in [packages/backend/src/utils/catalogUtils.ts](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/utils/catalogUtils.ts)
The catalog has its own version number, as we may not require to update it with every update. It will follow semantic versioning convention.
## `None` to Catalog `1.0`
`None` represents any catalog version prior to the first versioning.
Version `1.0` of the catalog adds an important property to models `backend`, defining the type of framework required by the model to run (E.g. LLamaCPP, WhisperCPP).
### 🛠️ How to migrate
You can either delete any existing `user-catalog` by deleting the `~/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab/user-catalog.json`.
> [!WARNING]
> This will remove the models you have imported from the catalog. You will be able to import it again afterward.
If you want to keep the data, you can migrate it by updating certain properties within the recipes and models fields.
### Recipes
The recipe object has a new property `backend` which defines which framework is required.
Value accepted are `llama-cpp`, `whisper-cpp` and `none`.
Moreover, the `models` property has been changed to `recommended`.
> [!TIP]
> Before Podman AI Lab version v1.2 recipes uses the `models` property to list the models compatible. Now all models using the same `backend` could be used. We introduced `recommended` to highlight certain models.
**Example**
```diff
{
"version": "1.0",
"recipes": [{
"id": "chatbot",
"description" : "This is a Streamlit chat demo application.",
"name" : "ChatBot",
"repository": "https://github.com/containers/ai-lab-recipes",
- "models": [
+ "recommended": [
"hf.instructlab.granite-7b-lab-GGUF",
"hf.instructlab.merlinite-7b-lab-GGUF"
]
+ "backend": "llama-cpp"
}],
"models": [],
"categories": []
}
```
### Models
The model object has also the new property `backend`, which defines which framework is required.
Additionally, we have enhanced security by introducing a new optional `sha256` property.
> [!TIP]
> To get the sha256 of a model, you can use the `sha256sum [model-file]` command in a terminal.
**Example**
```diff
{
"version": "1.0",
"recipes": [],
"models": [{
"id": "hf.instructlab.granite-7b-lab-GGUF",
"name": "instructlab/granite-7b-lab-GGUF",
"description": "# InstructLab Granite 7B",
"hw": "CPU",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf",
"memory": 4080218931,
"properties": {
"chatFormat": "openchat"
},
+ "sha256": "6adeaad8c048b35ea54562c55e454cc32c63118a32c7b8152cf706b290611487",
+ "backend": "llama-cpp"
}],
"categories": []
}
```
================================================
FILE: PACKAGING-GUIDE.md
================================================
# Packaging guide
## ApplicationCatalog
AI Lab uses an internal catalog embedded within the application. This catalog is loaded
by AI Lab and displayed when you access the catalog page.
The format of the catalog is JSON. It is possible for users to have a custom version of
the catalog. In order to do so, copy the file located at https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/assets/ai.json to $HOME/podman-desktop/ai-lab/catalog.json and AI Lab will use it instead of the embedded one.
Any change done to this file will also be automatically loaded by AI Lab.
### Format of the catalog file
The catalog file has three main elements: categories, models and recipes. Each of these elements is
represented in the JSON file as an array.
The catalog is `versioned`. Current version can be found in [ai.json](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/assets/ai.json#L2).
> :warning: when the version of the catalog is undefined or different from the current, the user-catalog will be ignored.
#### Categories
This is the top level construct of the catalog UI. Recipes are grouped into categories. A category
represents the kind of AI application. Although the list of categories provided by default by
AI Lab represents the AI landscape, it is possible to add new categories.
A category has three main attributes: an id (which should be unique among categories), a description
and a name. The category id attribute will then be used to attach a recipe to one or several categories.
#### Models
The catalog also lists the models that may be associated to recipes. A model is also a first class
citizen in AI Lab as they will be listed in the Models page and can be tested through the playground.
A model has the following attributes:
- ```id```: a unique identifier for the model
- ```name```: the model name
- ```description```: a detailed description about the model
- ```registry```: the model registry where the model is stored
- ```popularity```: an integer field giving the rating of the model. Can be thought as the number of stars
- ```license```: the license under which the model is available
- ```url```: the URL used to download the model
- ```memory```: the memory footprint of the model in bytes, as computed by the workflow `.github/workflows/compute-model-sizes.yaml`
- ```sha256```: the SHA-256 checksum to be used to verify the downloaded model is identical to the original. It is optional and it must be HEX encoded
#### Recipes
A recipe is a sample AI application that is packaged as one or several containers. It is built by AI Lab when the user chooses to download and run it on their workstation. It is provided as
source code and AI Lab will make sure the container images are built prior to launching the containers.
A recipe has the following attributes:
- ```id```: a unique identifier to the recipe
- ```name```: the recipe name
- ```description```: a detailed description about the recipe
- ```repository```: the URL where the recipe code can be retrieved
- ```ref```: an optional ref in the repository to checkout (a branch name, tag name, or commit full id - short commit id won't be recognized). If not defined, the default branch will be used
- ```categories```: an array of category id to be associated by this recipe
- ```basedir```: an optional path within the repository where the ai-lab.yaml file is located. If not provided, the ai-lab.yaml is assumed to be located at the root the repository
- ```readme```: a markdown description of the recipe
- ```models```: an array of model id to be associated with this recipe
#### Recipe configuration file
The configuration file is called ```ai-lab.yaml``` and follows the following syntax.
The root elements are called ```version``` and ```application```.
```version``` represents the version of the specifications that ai-lab adheres to (so far, the only accepted value here is `v1.0`).
```application``` contains an attribute called ```containers``` whose syntax is an array of objects containing the following attributes:
- ```name```: the name of the container
- ```contextdir```: the context directory used to build the container.
- ```containerfile```: the containerfile used to build the image
- ```model-service```: a boolean flag used to indicate if the container is running the model or not
- ```arch```: an optional array of architecture for which this image is compatible with. The values follow the
[GOARCH specification](https://go.dev/src/go/build/syslist.go)
- ```gpu-env```: an optional array of GPU environment for which this image is compatible with. The only accepted value here is cuda.
- ```ports```: an optional array of ports for which the application listens to.
- `image`: an optional image name to be used when building the container image.
The container that is running the service (having the ```model-service``` flag equal to ```true```) can use at runtime
the model managed by AI Lab through an environment variable ```MODEL_PATH``` whose value is the full path name of the
model file.
Below is given an example of such a configuration file:
```yaml
application:
containers:
- name: chatbot-inference-app
contextdir: ai_applications
containerfile: builds/Containerfile
- name: chatbot-model-service
contextdir: model_services
containerfile: base/Containerfile
model-service: true
arch:
- arm64
- amd64
ports:
- 8001
image: quay.io/redhat-et/chatbot-model-service:latest
- name: chatbot-model-servicecuda
contextdir: model_services
containerfile: cuda/Containerfile
model-service: true
gpu-env:
- cuda
arch:
- amd64
ports:
- 8501
image: quay.io/redhat-et/model_services:latest
```
================================================
FILE: README.md
================================================
# Podman AI Lab
Podman AI Lab is an open source extension for Podman Desktop to work with LLMs (Large Language Models) on a local environment. Featuring a recipe catalog with common AI use cases, a curated set of open source models, and a playground for learning, prototyping and experimentation, Podman AI Lab helps you to quickly and easily get started bringing AI into your applications, without depending on infrastructure beyond your laptop ensuring data privacy and security.
## Topics
- [Technology](#technology)
- [Extension features](#extension-features)
- [Requirements](#requirements)
- [Installation](#installation)
- [Usage](#usage)
- [Contributing](#contributing)
- [Feedback](#feedback)
## Technology
Podman AI Lab uses [Podman](https://podman.io) machines to run inference servers for LLM models and AI applications.
The AI models can be downloaded, and common formats like [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md), [Pytorch](https://pytorch.org) or [Tensorflow](https://www.tensorflow.org) are supported.
## Extension features
### AI models
Podman AI Lab provides a curated list of open source AI models and LLMs. Once downloaded, the models are available to be used for AI applications, model services and playgrounds.
#### Model services
Once a model is downloaded, a model service can be started. A model service is an inference server that is running in a container and exposing the model through the well-known chat API common to many providers.
#### Playgrounds
The integrated Playground environments allow for experimenting with available models in a local environment. An intuitive user prompt helps in exploring the capabilities and accuracy of various models and aids in finding the best model for the use case at hand. The Playground interface further allows for parameterizing models to further optimize the settings and attributes of each model.
### AI applications
Once an AI model is available through a well-known endpoint, it's easy to imagine a new world of applications that will connect and use the AI model. Podman AI Lab supports AI applications as a set of containers that are connected together.
Podman AI Lab ships with a so-called Recipes Catalog that helps you navigate a number of core AI use cases and problem domains such as Chat Bots, Code Generators and Text Summarizers. Each recipe comes with detailed explanations and sample applications that can be run with various large language models (LLMs). Experimenting with multiple models allows finding the optimal one for your use case.
## Requirements
### Software
- [Podman Desktop 1.8.0+](https://github.com/containers/podman-desktop)
- [Podman 4.9.0+](https://github.com/containers/podman)
- Compatible with Windows, macOS & Linux
### Hardware
LLMs AI models are heavy resource consumers both in terms of memory and CPU. Each of the provided models consumes about 4GiB of memory and requires at least 4 CPUs to run.
We recommend a minimum of 12GB of memory and at least 4 CPUs for the Podman machine. On Windows, the podman machine shares memory and CPU with all the Windows Subsystem for Linux (WSL) machines. By default, WSL is set to 50% of total memory and all logical processors. This can be changed in the WSL Settings (See [WSL Config](https://learn.microsoft.com/en-us/windows/wsl/wsl-config#wslconfig)).
As an additional recommended practice, do not run more than 3 models simultaneously.
## Installation
You can install the Podman AI Lab extension directly inside Podman Desktop.
Go to Extensions > Catalog > Install Podman AI Lab.

To install a development version, use the `Install custom...` action as shown in the recording below.
The name of the image to use is `ghcr.io/containers/podman-desktop-extension-ai-lab`. You can get released tags for the image at https://github.com/containers/podman-desktop-extension-ai-lab/pkgs/container/podman-desktop-extension-ai-lab.

## Usage
1. **Download a model**
Let's select a model from the catalog and download it locally to our workstation.

2. **Start an inference server**
Once a model is available locally, let's start an inference server

3. **Start a playground to have a chat conversation with model**

4. **Start an AI application and use it from the browser**

## Contributing
Want to help develop and contribute to Podman AI Lab?
You can use `pnpm watch --extension-folder` from the Podman Desktop directory to automatically rebuild and test the AI Lab extension:
> **_Note_**: make sure you have the appropriate [prerequisites](https://github.com/containers/podman-desktop/blob/main/CONTRIBUTING.md#prerequisites-prepare-your-environment)
installed.
```sh
git clone https://github.com/containers/podman-desktop
git clone https://github.com/containers/podman-desktop-extension-ai-lab
cd podman-desktop-extension-ai-lab
corepack enable pnpm
pnpm install
pnpm build
cd ../podman-desktop
pnpm watch --extension-folder ../podman-desktop-extension-ai-lab/packages/backend
```
If you are live editing the frontend package, from packages/frontend folder:
```
$ pnpm watch
```
### Cleaning up resources
We'll be adding a way to let a user cleanup their environment: see issue https://github.com/containers/podman-desktop-extension-ai-lab/issues/469.
For the time being, please consider the following actions:
1. Remove the extension from Podman Desktop, from the Settings > Extensions
2. Remove the running playground environments from the list of Pods
3. Remove the images built by the recipes
4. Remove the containers related to AI
5. Cleanup your local clone of the recipes: `$HOME/podman-desktop/ai-lab`
### 📖 Providing a custom catalog
The extension provides by default a curated list of recipes, models and categories. However, this system is extensible and you can define your own.
To enhance the existing catalog, you can create a file located in the extension storage folder `$HOME/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab/user-catalog.json`.
It must follow the same format as the default catalog [in the sources of the extension](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/packages/backend/src/assets/ai.json).
> :information_source: The default behaviour is to append the items of the user's catalog to the default one.
> :warning: Each item (recipes, models or categories) has a unique id, when conflict between the default catalog and the user one are found, the user's items overwrite the defaults.
### Packaging sample applications
Sample applications may be added to the catalog. See [packaging guide](https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/PACKAGING-GUIDE.md) for detailed information.
## Roadmap
The roadmap is always open and we are looking for your feedback. Please create new issues and upvote on the issues that are feeling the most important for you.
We will be working on the following items:
- **Expanded Recipes**: Discover new use cases and samples to inspire and accelerate your applications.
- **GPU Acceleration**: Speeding up processing times by leveraging GPU acceleration.
- **API/CLI**: Interact with Podman AI Lab from CLI and APIs.
- **Enhanced Playgrounds**: Streamlined workflows and UX giving a better space to experiment with LLMs and quickly iterate.
- **Fine Tuning with [InstructLab](https://instructlab.ai/)**: Re-train LLMs with a set of taxonomy knowledges. Learn more about [the InstructLab project](https://github.com/instructlab).
- **Enable Function Calling**: Use LLMs to retrieve or interact with external tools by doing API calls.
- **Local RAG**: Explore RAG pattern, load your document and test behavior of the model.
- **Bridge with AI Platforms (incl. K8s)**: Connect to remote models and ease deployment of applications.
## Feedback
You can provide your feedback on the extension with [this form](https://forms.gle/tctQ4RtZSiMyQr3R8) or create [an issue on this repository](https://github.com/containers/podman-desktop-extension-ai-lab/issues).
================================================
FILE: RELEASE.md
================================================
# Release process for Podman AI Lab
## Pre-requisites
- Create Enhancement Issue `Release vX.X.X` for current sprint, then update the label to `kind/release` and assign it to yourself.
- Confirm with Podman Desktop maintainers that pending / need-to-go-in PR's have been merged.
- Notify main contributors on Discord / Slack.
In the below example, we will pretend that we're upgrading from `1.1.0` to `1.2.0`. Please use the CORRECT release numbers as these are just example numbers.
## Release timeline
Below is what a typical release week may look like:
- **Monday (Notify):** 48-hour notification. Communicate to maintainers and public channels a release will be cut on Wednesday and to merge any pending PRs. Inform QE team. Start work on blog post as it is usually the longest part of the release process.
- **Tuesday (Staging, Testing & Blog):** Stage the release (see instructions below) to create a new cut of the release to test. Test the pre-release (master branch) build briefly. Get feedback from committers (if applicable). Push the blog post for review (as it usually takes a few back-and-forth reviews on documentation).
- **Wednesday (Release):** Publish the new release on the catalog using the below release process.
- **Thursday (Post-release Testing & Blog):** Test the post-release build briefly for any critical bugs. Confirm that new release has been pushed to the catalog. Push the blog post live. Get a known issues list together from QE and publish to the Podman Desktop Discussions, link to this from the release notes.
- **Friday (Communicate):** Friday is statistically the best day for new announcements. Post on internal channels. Post on reddit, hackernews, twitter, etc.
## Releasing on GitHub
1. Go to https://github.com/containers/podman-desktop-extension-ai-lab/actions/workflows/release.yaml
1. Click on the top right drop-down menu `Run workflow`
1. Enter the name of the release. Example: `1.2.0` (DO NOT use the v prefix like v1.2.0)
1. Specify the branch to use for the new release. It's main for all major releases. For a bugfix release, you'll select a different branch.
1. Click on the `Run workflow` button.
1. Note: `Run workflow` takes approximately 2-3 minutes.
1. Close the milestone for the respective release, make sure that all tasks within the milestone are completed / updated before closing. https://github.com/containers/podman-desktop-extension-ai-lab/milestones
1. If not already created, click on `New Milestone` and create a new milestone for the NEXT release.
1. Check that https://github.com/containers/podman-desktop-extension-ai-lab/actions/workflows/release.yaml has been completed.
1. There should be an automated PR that has been created. This will be automatically merged in after all tests have been ran (takes 5-10 minutes). The title looks like `chore: 📢 Bump version to 1.3.0`. Rerun workflow manually if some of e2e tests are failing.
1. Above PR MUST be merged before continuing with the steps.
1. Edit the new release https://github.com/containers/podman-desktop-extension-ai-lab/releases/edit/v1.2.0
1. Select previous tag (v1.1.0) and click on `Generate release notes` and the click on `Update release`
## Test release before it is rolling out.
The release is a pre-release, it means it is not yet the latest version, so no clients will automatically update to this version.
It allows QE (and everyone else) to test the release before they it will go live on the catalog.
## Next phase
- ❌ All severe bugs and regressions are investigated and discussed. If we agree any should block the release, need to fix the bugs and do a respin of the release with a new .z release like 1.2.1 instead of 1.2.0.
Create a branch if it does not exist. For example 1.2.x if 1.2.0 failed. Then, cherry-pick bugfixes in that branch.
- ✅ If committers agree we have a green light, proceed. **Do not forget to change the release from 'pre-release' to 'latest release' before proceeding**.
## Updating catalog
Pre-requisites:
- Ensure the release is OK (green workflow, image has been published https://github.com/containers/podman-desktop-extension-ai-lab/releases https://github.com/containers/podman-desktop-extension-ai-lab/pkgs/container/podman-desktop-extension-ai-lab).
#### Catalog
Create and submit a PR to the catalog (https://github.com/containers/podman-desktop-catalog on branch gh-pages). This is manual and will be automated in the future.
================================================
FILE: SECURITY.md
================================================
## Security and Disclosure Information Policy for the Podman Desktop Extension AI Lab Project
The Podman Desktop Extension AI Lab Project follows the [Security and Disclosure Information Policy](https://github.com/containers/common/blob/main/SECURITY.md) for the Containers Projects.
================================================
FILE: USAGE_DATA.md
================================================
# Data Collection
The AI Lab extension uses telemetry to collect anonymous usage data in order to identify issues and improve our user experience. You can read our privacy statement
[here](https://developers.redhat.com/article/tool-data-collection).
Telemetry for the extension is based on the Podman Desktop telemetry.
Users are prompted during Podman Desktop first startup to accept or decline telemetry. This setting can be
changed at any time in Settings > Preferences > Telemetry.
On disk the setting is stored in the `"telemetry.*"` keys within the settings file,
at `$HOME/.local/share/containers/podman-desktop/configuration/settings.json`. A generated anonymous id
is stored at `$HOME/.redhat/anonymousId`.
## What's included in the telemetry data
- General information, including operating system, machine architecture, and country.
- When the extension starts and stops.
- When the icon to enter the extension zone is clicked.
- When a recipe page is opened (with recipe Id and name).
- When a sample application is pulled (with recipe Id and name).
- When a playground is started or stopped (with model Id).
- When a request is sent to a model in the playground (with model Id, **without** request content).
- When a model is downloaded or deleted from disk.
No personally identifiable information is captured. An anonymous id is used so that we can correlate the actions of a user even if we can't tell who they are.
================================================
FILE: api/openapi.yaml
================================================
openapi: 3.0.0
info:
title: Podman Desktop AI Lab API
description: API for interacting with the Podman Desktop AI Lab service.
version: 0.0.1
servers:
- url: http://{host}:{port}
description: Podman Desktop AI Lab API server
variables:
host:
default: 127.0.0.1
port:
default: '10434'
tags:
- name: server
description: Server information
paths:
/api/version:
get:
operationId: getServerVersion
tags:
- server
description: Return the Podman Desktop AI Lab API server version
summary: Return the Podman Desktop AI Lab API server version
responses:
'200':
description: The Podman Desktop AI Lab API server version was successfully fetched
content:
application/json:
schema:
type: object
additionalProperties: false
properties:
version:
type: string
required:
- version
/api/tags:
get:
operationId: getModels
tags:
- models
description: List models that are available locally
summary: List models that are available locally
responses:
'200':
description: The models were successfully fetched
content:
application/json:
schema:
$ref: '#/components/schemas/ListResponse'
/api/pull:
post:
operationId: pullModel
tags:
- models
description: |
Download a model from the Podman AI Lab catalog.
summary: |
Download a model from the Podman AI Lab Catalog.
requestBody:
required: true
description: Request to pull a model
content:
application/json:
schema:
$ref: '#/components/schemas/PullRequest'
responses:
'200':
description: Model was successfully pulled
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/ProgressResponse'
/api/show:
post:
operationId: showModel
tags:
- models
description: |
Not implemented, returns an empty object - Show information about a model including details, modelfile, template,
parameters, license, and system prompt.
summary: |
Show information about a model including details, modelfile, template,
parameters, license, and system prompt.
requestBody:
required: true
description: Request to show a model
content:
application/json:
schema:
$ref: '#/components/schemas/ShowRequest'
responses:
'200':
description: The model's information was successfully fetched
content:
application/json:
schema:
$ref: '#/components/schemas/ShowResponse'
/api/generate:
post:
operationId: generateResponse
tags:
- generate
description: |
Generate a response for a given prompt with a provided model. This is
a streaming endpoint, so there will be a series of responses. The
final response object will include statistics and additional data from
the request.
summary: |
Generate a response for a given prompt with a provided model. This is
a streaming endpoint, so there will be a series of responses. The final
response object will include statistics and additional data from the
request.
requestBody:
required: true
description: Request to generate a response
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateRequest'
responses:
'200':
description: A response was successfully generated for the prompt
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateResponse'
/api/chat:
post:
operationId: generateChat
tags:
- chat
- generate
description: |
Generate the next message in a chat with a provided model. This is a
streaming endpoint, so there will be a series of responses. Streaming
can be disabled using "stream": false. The final response object will
include statistics and additional data from the request.
summary: |
Generate the next message in a chat with a provided model. This is a
streaming endpoint, so there will be a series of responses. Streaming
can be disabled using "stream": false. The final response object will
include statistics and additional data from the request.
requestBody:
required: true
description: Request to generate a response in a chat
content:
application/json:
schema:
$ref: '#/components/schemas/ChatRequest'
responses:
'200':
description: The next message was successfully generated for the chat
content:
application/json:
schema:
$ref: '#/components/schemas/ChatResponse'
/api/ps:
get:
operationId: getRunningModels
tags:
- models
description: List running models
summary: List running models
responses:
'200':
description: The list of running models was successfully fetched
content:
application/json:
schema:
$ref: '#/components/schemas/ProcessResponse'
components:
schemas:
ListResponse:
type: object
description: Response from a list request
properties:
models:
type: array
items:
$ref: '#/components/schemas/ListModelResponse'
ListModelResponse:
type: object
description: Response from a list request
properties:
name:
type: string
model:
type: string
modified_at:
type: string
format: date-time
size:
type: integer
digest:
type: string
details:
$ref: '#/components/schemas/ModelDetails'
ProcessResponse:
type: object
description: Response with a list of running models
properties:
models:
type: array
items:
$ref: '#/components/schemas/ProcessModelResponse'
ProcessModelResponse:
type: object
description: Running model description
properties:
name:
type: string
model:
type: string
size:
type: integer
digest:
type: string
details:
$ref: '#/components/schemas/ModelDetails'
expires_at:
type: string
format: date-time
size_vram:
type: integer
ModelDetails:
type: object
description: Details about a model
properties:
parent_model:
type: string
format:
type: string
family:
type: string
families:
type: array
items:
type: string
parameter_size:
type: string
quantization_level:
type: string
PullRequest:
type: object
description: Request to pull a model
properties:
model:
type: string
description: The name of the model to pull
example: instructlab/granite-7b-lab-GGUF
insecure:
type: boolean
description: |
allow insecure connections to the catalog.
stream:
type: boolean
description: |
If false the response will be returned as a single response object,
rather than a stream of objects
required:
- model
ProgressResponse:
type: object
description: The response returned from various streaming endpoints
properties:
status:
type: string
description: The status of the request
digest:
type: string
description: The SHA256 digest of the blob
total:
type: integer
description: The total size of the task
completed:
type: integer
description: The completed size of the task
ShowRequest:
type: object
description: Request to show a model
properties:
model:
type: string
description: The name of the model to show
required:
- model
ShowResponse:
type: object
description: Response from a show request
properties:
license:
type: string
description: The model license
modelfile:
type: string
description: The modelfile content
parameters:
type: string
description: The model parameters
template:
type: string
description: The model template
system:
type: string
description: The model system message/prompt
details:
$ref: '#/components/schemas/ModelDetails'
messages:
type: array
items:
$ref: '#/components/schemas/Message'
GenerateRequest:
type: object
description: Request to generate a response
properties:
model:
type: string
description: The model name
prompt:
type: string
description: The prompt to generate a response for
suffix:
type: string
images:
type: array
items:
type: string
format: byte
description: |
A list of base64-encoded images (for multimodal models such as
llava)
format:
type: string
description: |
The format to return a response in. Currently the only accepted
value is json
system:
type: string
description: |
System message to (overrides what is defined in the Modelfile)
template:
type: string
description: |
The prompt template to use (overrides what is defined in the
Modelfile)
context:
type: array
items:
type: integer
description: |
The context parameter returned from a previous request to generate,
this can be used to keep a short conversational memory
example: []
stream:
type: boolean
description: |
If false the response will be returned as a single response object,
rather than a stream of objects
raw:
type: boolean
description: |
If true no formatting will be applied to the prompt. You may choose
to use the raw parameter if you are specifying a full templated
prompt in your request to the API
keep_alive:
$ref: '#/components/schemas/Duration'
required:
- model
GenerateResponse:
type: object
description: Response from a generate request
properties:
model:
type: string
description: The model name that generated the response
created_at:
type: string
format: date-time
description: Timestamp of the response
response:
type: string
description: |
The textual response itself. When done, empty if the response was
streamed, if not streamed, this will contain the full response
done:
type: boolean
description: Specifies if the response is complete
context:
type: array
items:
type: integer
description: |
When done, encoding of the conversation used in this response
total_duration:
type: number
description: When done, time spent generating the response
load_duration:
type: number
description: When done, time spent in nanoseconds loading the model
prompt_eval_count:
type: integer
description: When done, number of tokens in the prompt
prompt_eval_duration:
type: number
description: |
When done, time spent in nanoseconds evaluating the prompt
eval_count:
type: integer
description: When done, number of tokens in the response
eval_duration:
type: number
description: |
When done, time in nanoseconds spent generating the response
ChatRequest:
type: object
description: Request to generate a response in a chat
properties:
model:
type: string
description: The model name
messages:
type: array
items:
$ref: '#/components/schemas/Message'
description: Messages of the chat - can be used to keep a chat memory
stream:
type: boolean
description: Enable streaming of returned response
format:
type: string
description: Format to return the response in (e.g. "json")
keep_alive:
$ref: '#/components/schemas/Duration'
options:
$ref: '#/components/schemas/Options'
ChatResponse:
type: object
description: Response from a chat request
properties:
model:
type: string
description: The model name
created_at:
type: string
format: date-time
description: Timestamp of the response
message:
$ref: '#/components/schemas/Message'
done_reason:
type: string
description: Reason the model stopped generating text
done:
type: boolean
description: Specifies if the response is complete
total_duration:
type: number
description: Total duration of the request
load_duration:
type: number
description: Load duration of the request
prompt_eval_count:
type: integer
description: Count of prompt evaluations
prompt_eval_duration:
type: number
description: Duration of prompt evaluations
eval_count:
type: integer
description: Count of evaluations
eval_duration:
type: number
description: Duration of evaluations
Message:
type: object
description: A message in a chat
properties:
role:
type: string
content:
type: string
images:
type: array
items:
type: string
format: byte
Duration:
type: string
description: A string representing the duration
example: "5m"
Options:
type: object
description: |
Advanced model and runner options for generation and chat requests
properties:
num_keep:
type: integer
description: |
Specifies the number of tokens from the beginning of
the context ot retain when the context limit is reached.
(Default: 4)
example: 4
seed:
type: integer
description: |
Sets the random number seed to use for generation. Setting this to
a specific number will make the model generate the same text for
the same prompt.
(Default: 0)
example: -1
num_predict:
type: integer
description: |
Maximum number of tokens to predict when generating text.
(Default: 128, -1 = infinite generation, -2 = fill context)
example: -1
top_k:
type: integer
description: |
Reduces the probability of generating nonsense. A higher value
(e.g. 100) will give more diverse answers, while a lower value
(e.g. 10) will be more conservative.
(Default: 40)
example: 40
top_p:
type: number
format: float
description: |
Works together with top-k. A higher value (e.g., 0.95) will lead to
more diverse text, while a lower value (e.g., 0.5) will generate
more focused and conservative text.
(Default: 0.9)
example: 0.9
tfs_z:
type: number
format: float
description: |
Tail free sampling is used to reduce the impact of less probable
tokens from the output. A higher value (e.g., 2.0) will reduce the
impact more, while a value of 1.0 disables this setting.
(default: 1)
example: 1.0
typical_p:
type: number
format: float
description: |
Controls the selection of typical words based on their probability
distribution. A higher value (e.g., 0.95) focuses on more typical
words, reducing the chance of unusual words being selected.
(Default: 1.0)
example: 1.0
repeat_last_n:
type: integer
description: |
Sets how far back for the model to look back to prevent repetition.
(Default: 64, 0 = disabled, -1 = num_ctx)
example: 64
temperature:
type: number
format: float
description: |
The temperature of the model. Increasing the temperature will make
the model answer more creatively.
(Default: 0.8)
example: 0.8
repeat_penalty:
type: number
format: float
description: |
Sets how strongly to penalize repetitions. A higher value
(e.g., 1.5) will penalize repetitions more strongly, while a lower
value (e.g., 0.9) will be more lenient.
(Default: 1.1)
example: 1.1
presence_penalty:
type: number
format: float
description: |
Applies a penalty to tokens that have already appeared in the
generated text, encouraging the model to introduce new tokens. A
higher value increases this penalty, promoting more varied and less
repetitive output.
(Default: 0.8)
example: 0.8
frequency_penalty:
type: number
format: float
description: |
Penalizes tokens based on their frequency in the generated text so
far. A higher value reduces the likelihood of frequent tokens being
generated again, promoting more diverse outputs.
(Default: 0.8)
example: 0.8
mirostat:
type: number
format: float
description: |
Enable Mirostat sampling for controlling perplexity.
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
example: 0
mirostat_tau:
type: number
format: float
description: |
Controls the balance between coherence and diversity of the output.
A lower value will result in more focused and coherent text.
(Default: 5.0)
example: 5.8
mirostat_eta:
type: number
format: float
description: |
Influences how quickly the algorithm responds to feedback from the
generated text. A lower learning rate will result in slower
adjustments, while a higher learning rate will make the algorithm
more responsive.
(Default: 0.1)
example: 0.1
penalize_newline:
type: boolean
description: |
Determines whether the model should penalize the generation of
newlines, which can help control the structure and formatting of
the output.
(Default: true)
example: true
stop:
type: array
items:
type: string
description: |
Sets the stop sequences to use. When this pattern is encountered
the LLM will stop generating text and return. Multiple stop patterns
may be set by specifying multiple separate stop parameters in a
modelfile.
example: ['AI assistant.']
numa:
type: boolean
description: |
Indicates whether to use Non-Uniform Memory Access (NUMA) for
optimizing memory usage and performance on multi-processor systems.
(Default: false)
example: false
num_ctx:
type: integer
description: |
Sets the size of the context window used to generate the next token.
(Default: 2048)
example: 2048
num_batch:
type: integer
description: |
Specifies the number of batches for processing.
(Default: 512)
example: 512
num_gpu:
type: integer
description: |
Specifies the number of GPUs to use. A value of -1 uses all
available GPUs.
(Default: -1)
example: -1
main_gpu:
type: integer
description: |
Specifies the primary GPU to use for processing.
(Default: 0)
low_vram:
type: boolean
description: |
Indicates whether to optimize the model for low VRAM usage.
(Default: false)
example: false
f16_kv:
type: boolean
description: |
Indicates whether to use 16-bit floating point precision for
key-value pairs, reducing memory usage.
(Default: false)
example: true
logits_all:
type: boolean
description: |
Specifies whether to output logits for all tokens.
(Default: false)
example: false
vocab_only:
type: boolean
description: |
Indicates whether to only load the vocabulary without the full model.
(Default: false)
example: false
use_mmap:
type: boolean
description: |
Determines whether to use memory-mapped files for loading the model,
improving performance on large models.
(Default: true)
example: true
use_mlock:
type: boolean
description: |
Determines whether to use memory locking to prevent swapping the
model out of RAM.
(Default: false)
example: false
num_thread:
type: integer
description: |
Specifies the number of threads to use for processing. A value of
0 uses all available threads.
(Default: 0)
example: 0
================================================
FILE: clean.sh
================================================
rm -rf node_modules packages/backend/node_modules packages/frontend/node_modules
================================================
FILE: commitlint.config.js
================================================
module.exports = { extends: ['@commitlint/config-conventional'] };
================================================
FILE: docs/proposals/ai-studio.md
================================================
# Motivation
Today, there is no notion of ordering between the containers. But we know that we have a dependency between
the client application and the container that is running the model.
The second issue is that there is no concept of starting point for a container so today we rely only on the
container being started by the container engine and we know that this is not adequate for the model service container
So this is handle by a kind of dirty fix: the containers are all started in parallel but as the client application
will fail because the model service is started (as it take a while), so we are trying to restart the client application
until the model service is properly started.
The purpose of this change is to propose an update to the ai-lab.yaml so that it is as much generic as it
could be and inspired from the Compose specification.
## Proposed changes
Define a condition for the container to be properly started: this would be based on the readinessProbe that can already
be defined in a Kubernetes container. In the first iteration, we would support only the ```exec``` field. If
```readinessProbe``` is defined, then we would check for the healthcheck status field to be ```healthy```
So the current chatbot file would be updated from:
```yaml
application:
type: language
name: chatbot
description: This is a LLM chatbot application that can interact with a llamacpp model-service
containers:
- name: chatbot-inference-app
contextdir: ai_applications
containerfile: builds/Containerfile
- name: chatbot-model-service
contextdir: model_services
containerfile: base/Containerfile
model-service: true
backend:
- llama
arch:
- arm64
- amd64
- name: chatbot-model-servicecuda
contextdir: model_services
containerfile: cuda/Containerfile
model-service: true
backend:
- llama
gpu-env:
- cuda
arch:
- amd64
```
to
```yaml
application:
type: language
name: chatbot
description: This is a LLM chatbot application that can interact with a llamacpp model-service
containers:
- name: chatbot-inference-app
contextdir: ai_applications
containerfile: builds/Containerfile
readinessProbe: # added
exec: # added
command: # added
- curl -f localhost:8080 || exit 1 # added
- name: chatbot-model-service
contextdir: model_services
containerfile: base/Containerfile
model-service: true
readinessProbe: # added
exec: # added
command: # added
- curl -f localhost:7860 || exit 1 # added
backend:
- llama
arch:
- arm64
- amd64
- name: chatbot-model-service
contextdir: model_services
containerfile: cuda/Containerfile
model-service: true
readinessProbe: # added
exec: # added
command: # added
- curl -f localhost:7860 || exit 1 # added
backend:
- llama
gpu-env:
- cuda
arch:
- amd64
```
From the Podman Desktop API point of view, this would require extending the
[ContainerCreateOptions](https://podman-desktop.io/api/interfaces/ContainerCreateOptions) structure to support the
HealthCheck option.
================================================
FILE: docs/proposals/state-management.md
================================================
# State management
The backend manages and persists the State. The backend pushes new state to the front-end
when changes happen, and the front-end can ask for the current value of the state.
The front-end uses `readable` stores to expose the state to the different pages. The store
listens for new states pushed by the backend (`onMessage`), and asks for the current state
at initial time.
The pages of the front-end subscribe to the store to get the value of the state in a reactive manner.
## Catalog
The catalog is persisted as a file in the user's filesystem. The backend reads the file at startup,
and watches the file for changes. The backend updates the state as soon as changes it detects changes.
The front-end uses a `readable` store, which waits for changes on the Catalog state
(using `onMessage('new-catalog-state', data)`),
and asks for the current state at startup (with `postMessage('ask-catalog-state')`).
The interested pages of the front-end subscribe to the store to get the value
of the Catalog state in a reactive manner.
## Pulled applications
The front-end initiates the pulling of an application (using `postMessage('pull-application', app-id)`).
The backend manages and persists the state of the pulled applications and pushes every update
on the state (progression, etc.) (using `postMessage('new-pulled-application-state, app-id, data)`).
The front-end uses a `readable` store, which waits for changes on the Pulled Applications state
(using `onMessage('new-pulled-application-state)`), and asks for the current state at startup
(with `postMessage('ask-pulled-applications-state')`).
The interested pages of the front-end subscribe to the store to get the value of the Pulled Applications state
in a reactive manner.
## Errors
The front-end initiates operations (pull application, etc). When an error happens during an operation,
the backend manages and persists the error in a centralized way.
The backend pushes new errors (using `postMessage('new-error-state', data)`).
Optionally, it can push errors to the core Podman Desktop, to display errors in the notifications system.
The front-end uses a `readable` store, which waits for changes on the Errors state (using `onMessage('new-error-state')`),
and asks for the current state at startup (using `postMessage('ask-error-state)`).
The interested pages of the front-end subscribe to the store to display the errors related to the page.
The user can acknowledge an error (using a `postMessage('ack-error', id)`).
================================================
FILE: eslint.config.mjs
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import globals from 'globals';
import js from '@eslint/js';
import typescriptLint from 'typescript-eslint';
import tsParser from '@typescript-eslint/parser';
import svelteParser from 'svelte-eslint-parser';
import importPlugin from 'eslint-plugin-import';
import { fixupConfigRules, fixupPluginRules } from '@eslint/compat';
import { fileURLToPath } from 'node:url';
import path from 'node:path';
import { FlatCompat } from '@eslint/eslintrc';
import unicorn from 'eslint-plugin-unicorn';
import noNull from 'eslint-plugin-no-null';
import sonarjs from 'eslint-plugin-sonarjs';
import etc from 'eslint-plugin-etc';
import svelte from 'eslint-plugin-svelte';
import redundantUndefined from 'eslint-plugin-redundant-undefined';
import simpleImportSort from 'eslint-plugin-simple-import-sort';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const compat = new FlatCompat({
baseDirectory: __dirname,
recommendedConfig: js.configs.recommended,
allConfig: js.configs.all,
});
const TYPESCRIPT_PROJECTS = ['packages/*/tsconfig.json', 'tests/*/tsconfig.json'];
export default [
{
ignores: [
'*.config.*js',
'**/*.config.*js',
'**/dist/**/*',
'**/test-resources',
'**/__mocks__/',
'**/coverage/',
'packages/backend/media/**',
'**/.svelte-kit/',
'scripts/**',
'**/src-generated/',
'tests/playwright/tests/playwright/output/**',
],
},
js.configs.recommended,
...typescriptLint.configs.recommended,
sonarjs.configs.recommended,
...svelte.configs['flat/recommended'],
...fixupConfigRules(
compat.extends('plugin:import/recommended', 'plugin:import/typescript', 'plugin:etc/recommended'),
),
{
plugins: {
// compliant v9 plug-ins
unicorn,
// non-compliant v9 plug-ins
etc: fixupPluginRules(etc),
import: fixupPluginRules(importPlugin),
'no-null': fixupPluginRules(noNull),
'redundant-undefined': fixupPluginRules(redundantUndefined),
'simple-import-sort': fixupPluginRules(simpleImportSort),
},
settings: {
'import/resolver': {
typescript: true,
node: true,
'eslint-import-resolver-custom-alias': {
alias: {
'/@': './src',
'/@gen': './src-generated',
},
extensions: ['.ts'],
packages: ['packages/*'],
},
},
},
},
{
linterOptions: {
reportUnusedDisableDirectives: 'off',
},
languageOptions: {
globals: {
...globals.node,
},
// parser: tsParser,
sourceType: 'module',
parserOptions: {
extraFileExtensions: ['.svelte'],
warnOnUnsupportedTypeScriptVersion: false,
project: TYPESCRIPT_PROJECTS,
},
},
},
{
rules: {
eqeqeq: 'error',
'prefer-promise-reject-errors': 'error',
semi: ['error', 'always'],
'comma-dangle': ['warn', 'always-multiline'],
quotes: [
'error',
'single',
{
allowTemplateLiterals: true,
},
],
'@typescript-eslint/explicit-function-return-type': 'off',
'@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_', caughtErrors: 'none' }],
'@typescript-eslint/no-var-requires': 'off',
'@typescript-eslint/consistent-type-imports': 'error',
'@typescript-eslint/no-explicit-any': 'error',
'@typescript-eslint/await-thenable': 'error',
'@typescript-eslint/no-floating-promises': ['error', { ignoreVoid: false }],
'@typescript-eslint/no-misused-promises': 'error',
'@typescript-eslint/prefer-optional-chain': 'error',
'@typescript-eslint/explicit-function-return-type': 'error',
'@typescript-eslint/prefer-nullish-coalescing': [
'error',
{
ignoreConditionalTests: true,
},
],
'@typescript-eslint/no-require-imports': 'off',
// unicorn custom rules
'unicorn/prefer-node-protocol': 'error',
'no-null/no-null': 'error',
'sonarjs/no-empty-function': 'off',
'sonarjs/deprecation': 'off',
'sonarjs/todo-tag': 'off',
'sonarjs/sonar-no-fallthrough': 'off',
/**
* Having a semicolon helps the optimizer interpret your code correctly.
* This avoids rare errors in optimized code.
* @see https://twitter.com/alex_kozack/status/1364210394328408066
*/
semi: ['error', 'always'],
/**
* This will make the history of changes in the hit a little cleaner
*/
'comma-dangle': ['warn', 'always-multiline'],
/**
* Just for beauty
*/
quotes: ['error', 'single', { allowTemplateLiterals: true }],
// disabled import/namespace rule as the plug-in is not fully compatible using the compat mode
'import/namespace': 'off',
'import/no-duplicates': 'error',
'import/first': 'error',
'import/newline-after-import': 'error',
'import/no-extraneous-dependencies': 'error',
'import/no-unresolved': 'off',
'import/default': 'off',
'import/no-named-as-default-member': 'off',
'import/no-named-as-default': 'off',
'sonarjs/cognitive-complexity': 'off',
'sonarjs/no-duplicate-string': 'off',
'sonarjs/no-empty-collection': 'off',
'sonarjs/no-small-switch': 'off',
'sonarjs/no-unused-expressions': 'off',
'etc/no-commented-out-code': 'error',
'etc/no-deprecated': 'off',
'etc/no-commented-out-code': 'off',
'redundant-undefined/redundant-undefined': 'error',
'import/no-extraneous-dependencies': 'error',
'import/no-restricted-paths': [
'error',
{
zones: [
{
target: './packages/backend/**/*',
from: ['./packages/frontend/**/*'],
},
{
target: './packages/frontend/**/*',
from: ['./packages/backend/**/*'],
},
],
},
],
// disabled as code in this project is not yet compliant:
'svelte/valid-compile': 'off',
'no-undef': 'off',
},
},
{
files: ['**/*.svelte'],
languageOptions: {
parser: svelteParser,
ecmaVersion: 5,
sourceType: 'script',
parserOptions: {
parser: tsParser,
},
},
rules: {
eqeqeq: 'off',
'etc/no-implicit-any-catch': 'off',
'no-inner-declarations': 'off',
'sonarjs/code-eval': 'off',
'sonarjs/different-types-comparison': 'off',
'sonarjs/prefer-nullish-coalescing': 'off',
'sonarjs/no-nested-template-literals': 'off',
'sonarjs/no-nested-conditional': 'off',
'@typescript-eslint/no-unused-vars': 'off',
'@typescript-eslint/ban-types': 'off',
'@typescript-eslint/no-unused-expressions': 'off',
},
},
{
files: ['packages/frontend/**'],
languageOptions: {
globals: {
...Object.fromEntries(Object.entries(globals.node).map(([key]) => [key, 'off'])),
...globals.browser,
},
},
},
{
files: ['packages/shared/**'],
languageOptions: {
globals: {
...Object.fromEntries(Object.entries(globals.node).map(([key]) => [key, 'off'])),
...Object.fromEntries(Object.entries(globals.browser).map(([key]) => [key, 'off'])),
},
},
},
];
================================================
FILE: package.json
================================================
{
"name": "ai-lab-monorepo",
"displayName": "ai-lab-monorepo",
"description": "ai-lab-monorepo",
"publisher": "redhat",
"version": "1.10.0-next",
"license": "Apache-2.0",
"private": true,
"engines": {
"node": ">=24.0.0",
"npm": ">=10.2.3"
},
"scripts": {
"build": "concurrently \"cd packages/frontend && pnpm run build\" \"cd packages/backend && pnpm run build\"",
"watch": "concurrently \"cd packages/frontend && pnpm run watch\" \"cd packages/backend && pnpm run watch\"",
"format:check": "prettier --check \"**/src/**/*.{ts,svelte}\"",
"format:fix": "prettier --write \"**/src/**/*.{ts,svelte}\"",
"lint:check": "eslint . --cache",
"lint:fix": "eslint . --cache --fix",
"svelte:check": "svelte-check",
"test:backend": "vitest run -r packages/backend --passWithNoTests --coverage",
"test:frontend": "vitest -c packages/frontend/vite.config.js run packages/frontend --passWithNoTests --coverage",
"test:shared": "vitest run -r packages/shared --passWithNoTests --coverage",
"test:unit": "pnpm run test:backend && pnpm run test:shared && pnpm run test:frontend",
"test:e2e": "cd tests/playwright && pnpm run test:e2e",
"test:e2e:smoke": "cd tests/playwright && pnpm run test:e2e:smoke",
"test:e2e:instructlab": "cd tests/playwright && pnpm run test:e2e:instructlab",
"typecheck:shared": "tsc --noEmit --project packages/shared",
"typecheck:frontend": "tsc --noEmit --project packages/frontend",
"typecheck:backend": "cd packages/backend && pnpm run typecheck",
"typecheck": "pnpm run typecheck:shared && pnpm run typecheck:frontend && pnpm run typecheck:backend",
"prepare": "husky"
},
"resolutions": {
"string-width": "^4.2.0",
"wrap-ansi": "^7.0.0",
"postman-code-generators": "1.10.1"
},
"lint-staged": {
"*.{js,ts,tsx,svelte}": [
"eslint --cache --fix",
"prettier --cache --write"
],
"*.{md,css,json}": "prettier --write"
},
"devDependencies": {
"@commitlint/cli": "^20.5.2",
"@commitlint/config-conventional": "^20.5.0",
"@eslint/compat": "^2.0.5",
"@typescript-eslint/eslint-plugin": "^8.59.1",
"@typescript-eslint/parser": "^8.59.1",
"@vitest/coverage-v8": "^3.2.3",
"autoprefixer": "^10.5.0",
"commitlint": "^20.5.2",
"concurrently": "^9.2.1",
"eslint": "^9.39.2",
"eslint-import-resolver-custom-alias": "^1.3.2",
"eslint-import-resolver-typescript": "^4.3.5",
"eslint-plugin-etc": "^2.0.3",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-no-null": "^1.0.2",
"eslint-plugin-redundant-undefined": "^1.0.0",
"eslint-plugin-simple-import-sort": "^13.0.0",
"eslint-plugin-sonarjs": "^4.0.3",
"eslint-plugin-svelte": "^3.17.1",
"eslint-plugin-unicorn": "^64.0.0",
"globals": "^17.5.0",
"husky": "^9.1.7",
"lint-staged": "^16.4.0",
"msw": "^2.14.2",
"prettier": "^3.8.3",
"prettier-plugin-svelte": "^3.5.1",
"svelte-check": "^4.4.6",
"svelte-eslint-parser": "^1.6.0",
"typescript": "5.9.3",
"typescript-eslint": "^8.59.1",
"vite": "^7.3.1",
"vitest": "^3.0.5"
},
"workspaces": {
"packages": [
"packages/*",
"tests/*"
]
},
"dependencies": {
"js-yaml": "^4.1.1",
"zod": "^4.3.6"
},
"scarfSettings": {
"enabled": false
},
"pnpm": {
"overrides": {
"postman-collection>semver": "^7.5.2"
},
"ignoredBuiltDependencies": [
"@scarf/scarf",
"@tailwindcss/oxide",
"esbuild",
"postman-code-generators",
"svelte-preprocess",
"unrs-resolver"
]
},
"packageManager": "pnpm@10.12.4+sha512.5ea8b0deed94ed68691c9bad4c955492705c5eeb8a87ef86bc62c74a26b037b08ff9570f108b2e4dbd1dd1a9186fea925e527f141c648e85af45631074680184"
}
================================================
FILE: packages/backend/.gitignore
================================================
media
/src-generated
================================================
FILE: packages/backend/__mocks__/@podman-desktop/api.js
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
/**
* Mock the extension API for vitest.
* This file is referenced from vitest.config.js file.
*/
const plugin = {};
module.exports = plugin;
================================================
FILE: packages/backend/package.json
================================================
{
"name": "ai-lab",
"displayName": "Podman AI Lab",
"description": "Podman AI Lab lets you work with LLMs locally, exploring AI fundamentals, experimenting with models and prompts, and serving models while maintaining data security and privacy.",
"version": "1.10.0-next",
"icon": "icon.png",
"type": "module",
"publisher": "redhat",
"license": "Apache-2.0",
"engines": {
"podman-desktop": ">=1.8.0"
},
"main": "./dist/extension.cjs",
"contributes": {
"commands": [
{
"command": "ai-lab.navigation.inference.start",
"title": "AI Lab: navigate to inference start page",
"hidden": true
},
{
"command": "ai-lab.navigation.recipe.start",
"title": "AI Lab: navigate to recipe start page",
"hidden": true
}
],
"configuration": {
"title": "AI Lab",
"properties": {
"ai-lab.models.path": {
"type": "string",
"format": "folder",
"default": "",
"description": "Custom path where to download models. Note: The extension must be restarted for changes to take effect. (Default is blank)"
},
"ai-lab.modelUploadDisabled": {
"type": "boolean",
"default": false,
"description": "Disable the model upload to the podman machine",
"hidden": true
},
"ai-lab.experimentalGPU": {
"type": "boolean",
"default": false,
"description": "Experimental GPU support for inference servers"
},
"ai-lab.apiPort": {
"type": "number",
"default": 10434,
"minimum": 1024,
"maximum": 65535,
"description": "Port on which the API is listening (requires restart of extension)"
},
"ai-lab.inferenceRuntime": {
"type": "string",
"enum": [
"all",
"llama-cpp",
"whisper-cpp",
"none"
],
"description": "Choose the default inferencing runtime for AI Lab"
},
"ai-lab.experimentalTuning": {
"type": "boolean",
"default": false,
"description": "Display InstructLab Tuning screens (experimental)",
"hidden": true
},
"ai-lab.showGPUPromotion": {
"type": "boolean",
"default": true,
"description": "Display GPU promotion banner",
"hidden": true
}
}
},
"icons": {
"brain-icon": {
"description": "Brain icon",
"default": {
"fontPath": "brain.woff2",
"fontCharacter": "\\E001"
}
}
},
"views": {
"icons/containersList": [
{
"when": "ai-lab-model-id in containerLabelKeys",
"icon": "${brain-icon}"
}
],
"icons/image": [
{
"when": "ai-lab-recipe-id in imageLabelKeys",
"icon": "${brain-icon}"
}
]
}
},
"scripts": {
"generate": "npx openapi-typescript ../../api/openapi.yaml -o src-generated/openapi.ts",
"build": "pnpm run generate && vite build",
"test": "vitest run --coverage",
"test:watch": "vitest watch --coverage",
"format:check": "prettier --check \"src/**/*.ts\"",
"format:fix": "prettier --write \"src/**/*.ts\"",
"watch": "pnpm run generate && npx vite --mode development build -w",
"typecheck": "pnpm run generate && tsc --noEmit"
},
"dependencies": {
"@ai-sdk/mcp": "^1.0.36",
"@ai-sdk/openai-compatible": "^2.0.42",
"@huggingface/gguf": "^0.4.2",
"@huggingface/hub": "^2.11.0",
"ai": "^6.0.168",
"express": "^5.2.1",
"express-openapi-validator": "^5.6.2",
"isomorphic-git": "^1.37.6",
"js-yaml": "^4.1.1",
"mustache": "^4.2.0",
"openai": "^6.35.0",
"postman-code-generators": "^1.14.1",
"postman-collection": "^5.3.0",
"semver": "^7.7.4",
"swagger-ui-dist": "^5.32.5",
"swagger-ui-express": "^5.0.1",
"systeminformation": "^5.31.5",
"xml-js": "^1.6.11"
},
"devDependencies": {
"@podman-desktop/api": "1.13.0-202409181313-78725a6565",
"@ai-sdk/provider": "^3.0.8",
"@ai-sdk/provider-utils": "^4.0.24",
"@rollup/plugin-replace": "^6.0.3",
"@types/express": "^5.0.6",
"@types/js-yaml": "^4.0.9",
"@types/mustache": "^4.2.6",
"@types/node": "^24",
"@types/postman-collection": "^3.5.11",
"@types/supertest": "^7.2.0",
"@types/swagger-ui-dist": "^3.30.5",
"@types/swagger-ui-express": "^4.1.8",
"openapi-typescript": "^7.13.0",
"supertest": "^7.2.2",
"vitest": "^3.0.5"
}
}
================================================
FILE: packages/backend/src/assets/ai.json
================================================
{
"version": "1.0",
"recipes": [
{
"id": "chatbot",
"description": "This recipe provides a blueprint for developers to create their own AI-powered chat applications using Streamlit.",
"name": "ChatBot",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/chatbot",
"readme": "# Chat Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the chat application below.\n\n \n\n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `chatbot`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the chatbot application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot\npodman pod rm chatbot\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "langchain"]
},
{
"id": "chatbot-pydantic-ai",
"description": "This recipe provides a blueprint for developers to create their own AI-powered chat applications with the pydantic framework using Streamlit",
"name": "Chatbot PydanticAI",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/chatbot-pydantic-ai",
"readme": "# Chatbot Pydantic Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the chat application below.\n\n \n\n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot Pydantic AI` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot-pydantic-ai.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot-pydantic-ai.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `chatbot-pydantic-ai`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the chatbot-pydantic-ai application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot-pydantic-ai\npodman pod rm chatbot-pydantic-ai\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot-pydantic-ai\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-pydantic-ai from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-pydantic-ai from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot-pydantic-ai applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot-pydantic-ai workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot-pydantic-ai application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-pydantic-ai-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot-pydantic-ai service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot-pydantic-ai\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "PydanticAI"]
},
{
"id": "agents",
"description": "This recipe shows how ReAct can be used to create an intelligent music discovery assistant with Spotify API.",
"name": "ReAct Agent Application",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/agents",
"readme": "# ReAct Agent Application\n\n This recipe demonstrates the ReAct (Reasoning and Acting) framework in action through a music exploration application. ReAct enables AI to think step-by-step about tasks, take appropriate actions, and provide reasoned responses. The application shows how ReAct can be used to create an intelligent music discovery assistant that combines reasoning with Spotify API interactions.\nThe application utilizes [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) for the Model Service and integrates with Spotify's API for music data. The recipe uses [Langchain](https://python.langchain.com/docs/get_started/introduction) for the ReAct implementation and [Streamlit](https://streamlit.io/) for the UI layer.\n\n## Spotify API Access\nTo use this application, you'll need Spotify API credentials (follow the link here for documentation https://developer.spotify.com/documentation/web-api):\n- Create a Spotify Developer account\n- Create an application in the Spotify Developer Dashboard (https://developer.spotify.com/documentation/web-api/concepts/apps dont worry about adding web/redirect url use the defaults)\n- Get your Client ID and Client Secret once the app is created (https://developer.spotify.com/dashboard)\n\nThese can be provided through environment variables or the application's UI.\n\n## Try the ReAct Agent Application\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `ReAct Agent` and follow the instructions to start the application.\n\n# Build the Application\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the Pod above is built, run, and what purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n## Download a model\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of ways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from [huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\nThe recommended model can be downloaded using the code snippet below:\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/agents\n```\n_A full list of supported open models is forthcoming._ \n\n## Build the Model Service\nThe complete instructions for building and deploying the Model Service can be found in the [llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\nThe AI Application can be built from the make command:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/agents from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/agents from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled applications.",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "langchain"]
},
{
"id": "summarizer",
"description": "This recipe guides into creating custom LLM-powered summarization applications using Streamlit.",
"name": "Summarizer",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/summarizer",
"readme": "# Text Summarizer Application\n\n This recipe helps developers start building their own custom LLM enabled summarizer applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the summarizer application below.\n\n \n\n\n## Try the Summarizer Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Summarizer` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/summarizer.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/summarizer.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `summarizer`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the summarizer application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop summarizer\npodman pod rm summarizer\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/summarizer\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/summarizer from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/summarizer from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the summarizer application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled summarizer applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample summarizer workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the summarizer application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/summarizer-bootc:latest\n```\n\nUpon a reboot, you'll see that the summarizer service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status summarizer\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "langchain"]
},
{
"id": "codegeneration",
"description": "This recipes showcases how to leverage LLM to build your own custom code generation application.",
"name": "Code Generation",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "generator",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/codegen",
"readme": "# Code Generation Application\n\n This recipe helps developers start building their own custom LLM enabled code generation applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the code generation application below.\n\n \n\n\n## Try the Code Generation Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Code Generation` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/codegen.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/codegen.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `codegen`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the codegen application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop codegen\npodman pod rm codgen\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-8b-code-instruct](https://huggingface.co/ibm-granite/granite-8b-code-instruct-4k). This is a well\nperformant mid-sized model with an apache-2.0 license fine tuned for code generation. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-8b-code-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here:https://huggingface.co/ibm-granite/granite-8b-code-instruct-4k-GGUF.\n\nThere are a number of options for quantization level, but we recommend `Q4_K_M`. \n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-8b-code-instruct-4k-GGUF/resolve/main/granite-8b-code-instruct.Q4_K_M.gguf\ncd ../recipes/natural_language_processing/codgen\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/codegen from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/codegen from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the code generation application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled code generation applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample code generation workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the code generation application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/codegen-bootc:latest\n```\n\nUpon a reboot, you'll see that the codegen service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status codegen\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF",
"hf.ibm-granite.granite-8b-code-instruct"
],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "langchain"]
},
{
"id": "rag",
"description": "This application illustrates how to integrate RAG (Retrieval Augmented Generation) into LLM applications enabling to interact with your own documents.",
"name": "RAG Chatbot",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/rag",
"readme": "# RAG (Retrieval Augmented Generation) Chat Application\n\nThis demo provides a simple recipe to help developers start to build out their own custom RAG (Retrieval Augmented Generation) applications. It consists of three main components; the Model Service, the Vector Database and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\nIn order for the LLM to interact with our documents, we need them stored and available in such a manner that we can retrieve a small subset of them that are relevant to our query. To do this we employ a Vector Database alongside an embedding model. The embedding model converts our documents into numerical representations, vectors, such that similarity searches can be easily performed. The Vector Database stores these vectors for us and makes them available to the LLM. In this recipe we can use [chromaDB](https://docs.trychroma.com/) or [Milvus](https://milvus.io/) as our Vector Database.\n\nOur AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with our Model Service and we use [Streamlit](https://streamlit.io/) for our UI layer. Below please see an example of the RAG application. \n\n\n\n\n## Try the RAG chat application\n\n_COMING SOON to AI LAB_\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `RAG Chatbot` and follow the instructions to start the application.\n\nIf you prefer building and running the application from terminal, please run the following commands from this directory.\n\nFirst, build application's meta data and run the generated Kubernetes YAML which will spin up a Pod along with a number of containers:\n```\nmake quadlet\npodman kube play build/rag.yaml\n```\n\nThe Pod is named `rag`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n```\npodman pod list\npodman ps\n```\n\nTo stop and remove the Pod, run:\n```\npodman pod stop rag\npodman pod rm rag\n```\n\nOnce the Pod is running, please refer to the section below to [interact with the RAG chatbot application](#interact-with-the-ai-application).\n\n# Build the Application\n\nIn order to build this application we will need two models, a Vector Database, a Model Service and an AI Application. \n\n* [Download models](#download-models)\n* [Deploy the Vector Database](#deploy-the-vector-database)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n\n### Download models\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF). This is a well\nperformant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md).\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/rag\n```\n\n_A full list of supported open models is forthcoming._ \n\nIn addition to the LLM, RAG applications also require an embedding model to convert documents between natural language and vector representations. For this demo we will use [`BAAI/bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) it is a fairly standard model for this use case and has an MIT license. \n\nThe code snippet below can be used to pull a copy of the `BAAI/bge-base-en-v1.5` embedding model and store it in your `models/` directory. \n\n```python \nfrom huggingface_hub import snapshot_download\nsnapshot_download(repo_id=\"BAAI/bge-base-en-v1.5\",\n cache_dir=\"models/\",\n local_files_only=False)\n```\n\n### Deploy the Vector Database \n\nTo deploy the Vector Database service locally, simply use the existing ChromaDB or Milvus image. The Vector Database is ephemeral and will need to be re-populated each time the container restarts. When implementing RAG in production, you will want a long running and backed up Vector Database.\n\n\n#### ChromaDB\n```bash\npodman pull chromadb/chroma\n```\n```bash\npodman run --rm -it -p 8000:8000 chroma\n```\n#### Milvus\n```bash\npodman pull milvusdb/milvus:master-20240426-bed6363f\n```\n```bash\npodman run -it \\\n --name milvus-standalone \\\n --security-opt seccomp:unconfined \\\n -e ETCD_USE_EMBED=true \\\n -e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \\\n -e COMMON_STORAGETYPE=local \\\n -v $(pwd)/volumes/milvus:/var/lib/milvus \\\n -v $(pwd)/embedEtcd.yaml:/milvus/configs/embedEtcd.yaml \\\n -p 19530:19530 \\\n -p 9091:9091 \\\n -p 2379:2379 \\\n --health-cmd=\"curl -f http://localhost:9091/healthz\" \\\n --health-interval=30s \\\n --health-start-period=90s \\\n --health-timeout=20s \\\n --health-retries=3 \\\n milvusdb/milvus:master-20240426-bed6363f \\\n milvus run standalone 1> /dev/null\n```\nNote: For running the Milvus instance, make sure you have the `$(pwd)/volumes/milvus` directory and `$(pwd)/embedEtcd.yaml` file as shown in this repository. These are required by the database for its operations.\n\n\n### Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built with the following code snippet:\n\n```bash\ncd model_servers/llamacpp_python\npodman build -t llamacppserver -f ./base/Containerfile .\n```\n\n\n### Deploy the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. You can start your local Model Service using the following Podman command:\n```\npodman run --rm -it \\\n -p 8001:8001 \\\n -v Local/path/to/locallm/models:/locallm/models \\\n -e MODEL_PATH=models/ \\\n -e HOST=0.0.0.0 \\\n -e PORT=8001 \\\n llamacppserver\n```\n\n### Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application image in the `rag-langchain/` directory.\n\n```bash\ncd rag\nmake APP_IMAGE=rag build\n```\n\n### Deploy the AI Application\n\nMake sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.\n\nThere also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database. \n\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run --rm -it -p 8501:8501 \\\n-e MODEL_ENDPOINT=http://10.88.0.1:8001 \\\n-e VECTORDB_HOST=10.88.0.1 \\\n-v Local/path/to/locallm/models/:/rag/models \\\nrag \n```\n\n### Interact with the AI Application\n\nEverything should now be up an running with the rag application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled RAG applications. \n\n### Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample RAG chatbot workload as a service that starts when a system is booted, cd into this folder\nand run:\n\n\n```\nmake BOOTC_IMAGE=quay.io/your/rag-bootc:latest bootc\n```\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 BOOTC_IMAGE=quay.io/your/rag-bootc:latest bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the RAG chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```\nbootc switch quay.io/your/rag-bootc:latest\n```\n\nUpon a reboot, you'll see that the RAG chatbot service is running on the system.\n\nCheck on the service with\n\n```\nssh user@bootc-system-ip\nsudo systemctl status rag\n```\n\n#### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n##### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n\n### Makefile variables\n\nThere are several [Makefile variables](../../common/README.md) defined within each `recipe` Makefile which can be\nused to override defaults for a variety of make targets.\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "langchain", "vectordb"]
},
{
"id": "rag-nodejs",
"description": "This application illustrates how to integrate RAG (Retrieval Augmented Generation) into LLM applications written in Node.js enabling to interact with your own documents.",
"name": "Node.js RAG Chatbot",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/rag-nodejs",
"readme": "# RAG (Retrieval Augmented Generation) Chat Application\n\nThis demo provides a simple recipe to help Node.js developers start to build out their own custom RAG (Retrieval Augmented Generation) applications. It consists of three main components; the Model Service, the Vector Database and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\nIn order for the LLM to interact with our documents, we need them stored and available in such a manner that we can retrieve a small subset of them that are relevant to our query. To do this we employ a Vector Database alongside an embedding model. The embedding model converts our documents into numerical representations, vectors, such that similarity searches can be easily performed. The Vector Database stores these vectors for us and makes them available to the LLM. In this recipe we can use [chromaDB](https://docs.trychroma.com/) as our Vector Database.\n\nOur AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://js.langchain.com/docs/introduction/) package to simplify communication with our Model Service and we use [React Chatbotify](https://react-chatbotify.com/) and [Next.js](https://nextjs.org/) for our UI layer. Below please see an example of the RAG application. \n\n\n\n\n## Try the RAG chat application\n\n_COMING SOON to AI LAB_\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `RAG Node.js Chatbot` and follow the instructions to start the application.\n\nIf you prefer building and running the application from terminal, please run the following commands from this directory.\n\nFirst, build application's meta data and run the generated Kubernetes YAML which will spin up a Pod along with a number of containers:\n```\nmake quadlet\npodman kube play build/rag-nodesjs.yaml\n```\n\nThe Pod is named `rag_nodejs`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n```\npodman pod list\npodman ps\n```\n\nTo stop and remove the Pod, run:\n```\npodman pod stop rag_nodejs\npodman pod rm rag_nodejs\n```\n\nOnce the Pod is running, please refer to the section below to [interact with the RAG chatbot application](#interact-with-the-ai-application).\n\n# Build the Application\n\nIn order to build this application we will need two models, a Vector Database, a Model Service and an AI Application. \n\n* [Download models](#download-models)\n* [Deploy the Vector Database](#deploy-the-vector-database)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n\n### Download models\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF). This is a well\nperformant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md).\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/rag_nodejs\n```\n\n_A full list of supported open models is forthcoming._ \n\n### Deploy the Vector Database \n\nTo deploy the Vector Database service locally, simply use the existing ChromaDB. The Vector Database is ephemeral and will need to be re-populated each time the container restarts. When implementing RAG in production, you will want a long running and backed up Vector Database.\n\n\n#### ChromaDB\n```bash\npodman pull chromadb/chroma\n```\n```bash\npodman run --rm -it -p 8000:8000 chroma\n```\n\n### Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built with the following code snippet:\n\n```bash\ncd model_servers/llamacpp_python\npodman build -t llamacppserver -f ./base/Containerfile .\n```\n\n\n### Deploy the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [the llamacpp_python model-service document](../model_servers/llamacpp_python/README.md).\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. You can start your local Model Service using the following Podman command:\n```\npodman run --rm -it \\\n -p 8001:8001 \\\n -v Local/path/to/locallm/models:/locallm/models \\\n -e MODEL_PATH=models/ \\\n -e HOST=0.0.0.0 \\\n -e PORT=8001 \\\n llamacppserver\n```\n\n### Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application image in the `rag-nodejs/` directory.\n\n```bash\ncd rag-nodejs\nmake APP_IMAGE=rag-nodejs build\n```\n\n### Deploy the AI Application\n\nMake sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.\n\nThere also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database. \n\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run --rm -it -p 8501:8501 \\\n-e MODEL_ENDPOINT=http://10.88.0.1:8001 \\\n-e VECTORDB_HOST=10.88.0.1 \\\n-v Local/path/to/locallm/models/:/rag/models \\\nrag-nodejs \n```\n\n### Interact with the AI Application\n\nEverything should now be up an running with the rag application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled RAG applications. \n\n### Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample RAG chatbot workload as a service that starts when a system is booted, cd into this folder\nand run:\n\n\n```\nmake BOOTC_IMAGE=quay.io/your/rag-nodejs-bootc:latest bootc\n```\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 BOOTC_IMAGE=quay.io/your/rag-nodejs-bootc:latest bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the RAG Node.js chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```\nbootc switch quay.io/your/rag-nodejs-bootc:latest\n```\n\nUpon a reboot, you'll see that the RAG Node.js chatbot service is running on the system.\n\nCheck on the service with\n\n```\nssh user@bootc-system-ip\nsudo systemctl status raa-nodejsg\n```\n\n#### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n##### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n\n### Makefile variables\n\nThere are several [Makefile variables](../../common/README.md) defined within each `recipe` Makefile which can be\nused to override defaults for a variety of make targets.\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["javascript"],
"frameworks": ["react", "langchain", "vectordb"]
},
{
"id": "chatbot-java-quarkus",
"description": "This is a Java Quarkus-based recipe demonstrating how to create an AI-powered chat applications.",
"name": "Java-based ChatBot (Quarkus)",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/chatbot-java-quarkus",
"readme": "# Java-based chatbot application\n\nThis application implements a simple chatbot backed by Quarkus and its\nLangChain4j extension. The UI communicates with the backend application via\nweb sockets and the backend uses the OpenAI API to talk to the model served\nby Podman AI Lab.\n\nDocumentation for Quarkus+LangChain4j can be found at\nhttps://docs.quarkiverse.io/quarkus-langchain4j/dev/.",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["java"],
"frameworks": ["quarkus", "langchain4j"]
},
{
"id": "chatbot-javascript-react",
"description": "This is a NodeJS based recipe demonstrating how to create an AI-powered chat applications.",
"name": "Node.js based ChatBot",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/chatbot-nodejs",
"readme": "# Chat Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications using Node.js and JavaScript. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's]( https://js.langchain.com/docs/introduction) JavaScript package to simplify communication with the Model Service and uses [react-chatbotify](https://react-chatbotify.com/) for the UI layer. You can find an example of the chat application below.\n\n \n\n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Node.js based Chatbot` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot-nodejs.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot-nodejs.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `nodejs chat app`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. \nPlease refer to the section below for more details about [interacting with the chatbot application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot-nodejs\npodman pod rm chatbot-nodejs\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot-nodejs\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-nodejs from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot-nodejs from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-nodejs-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot-nodejs\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"hf.ibm-research.granite-3.2-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["javascript"],
"frameworks": ["react", "langchain"]
},
{
"id": "function-calling",
"description": "This recipes guides into multiple function calling use cases, showing the ability to structure data and chain multiple tasks, using Streamlit.",
"name": "Function calling",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/function_calling",
"readme": "# Function Calling Application\n\n This recipe helps developers start building their own custom function calling enabled chat applications. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the chat application below.\n\n \n\n\n## Try the Function Calling Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Function Calling` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/chatbot.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `chatbot`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. However, if you started the app via the podman desktop UI, a random port will be assigned instead of `8501`. Please use the AI App Details `Open AI App` button to access it instead. \nPlease refer to the section below for more details about [interacting with the chatbot application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop chatbot\npodman pod rm chatbot\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/chatbot\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled chatbot applications. \n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/chatbot-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status chatbot\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "langchain"]
},
{
"id": "function-calling-nodejs",
"description": "This recipes guides into multiple function calling use cases, showing the ability to structure data and chain multiple tasks, using Streamlit.",
"name": "Node.js Function calling",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/function-calling-nodejs",
"readme": "# Function Calling Application\n\n This recipe helps developers start building their own AI applications with function calling capabilities. It consists of two main components: the Model Service and the AI Application.\n\n There are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\n\n The AI Application will connect to the Model Service via its OpenAI compatible API. The recipe relies on [Langchain's](https://js.langchain.com/v0.2/docs/introduction/) Typescript package to simplify communication with the Model Service and [langgraph.js](https://langchain-ai.github.io/langgraphjs/) to enable the LLM to call functions. It uses [fastify](https://fastify.dev/) as the backend-server and chart.js to plot the weather data returned. You can find an example of the chat application below.\n\n\n\n\n## Try the Function Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `function-calling-nodejs` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will\ngo into greater detail on how each container in the Pod above is built, run, and \nwhat purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n\nThis application requires a model, a model service and an AI inferencing application.\n\n* [Quickstart](#quickstart)\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n* [Embed the AI Application in a Bootable Container Image](#embed-the-ai-application-in-a-bootable-container-image)\n\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command\nbuilds the application's metadata and generates Kubernetes YAML at `./build/chatbot.yaml` to spin up a Pod that can then be launched locally.\nTry it with:\n\n```\nmake quadlet\npodman kube play build/function-calling-nodejs.yaml\n```\n\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `function-calling-nodejs`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n\n```\npodman pod list\npodman ps\n```\n\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. However, if you started the app via the podman desktop UI, a random port will be assigned instead of `8501`. Please use the AI App Details `Open AI App` button to access it instead. \nPlease refer to the section below for more details about [interacting with the function calling application](#interact-with-the-ai-application).\n\nTo stop and remove the Pod, run:\n\n```\npodman pod stop function-calling-nodejs\npodman pod rm function-calling-nodejs\n```\n\n## Download a model\n\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well\nperformant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted\nand quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of\nways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from\n[huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/function-calling-nodejs\n```\n\n_A full list of supported open models is forthcoming._ \n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the\n[llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\n\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nThe AI Application can be built from the make command:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\n\nEverything should now be up an running with the function calling application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled function calling applications.\n\n## Embed the AI Application in a Bootable Container Image\n\nTo build a bootable container image that includes this sample chatbot workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\n\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\n\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n\n```\nmake ARCH=x86_64 bootc\n```\n\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the chatbot application, it's as simple as ssh-ing into the bootc system and running:\n\n```bash\nbootc switch quay.io/ai-lab/function-calling-nodejs-bootc:latest\n```\n\nUpon a reboot, you'll see that the chatbot service is running on the system. Check on the service with:\n\n```bash\nssh user@bootc-system-ip\nsudo systemctl status function-calling-nodejs\n```\n\n### What are bootable containers?\n\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\n\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than\nat _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system.\nBootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization\ntools. Might I suggest [podman](https://podman.io/)?\n\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI\nimage registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think\nfactories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\n\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\n\nYou can convert a bootc image to a bootable disk image using the\n[quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\n\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\n\nDefault image types can be set via the DISK_TYPE Makefile variable.\n\n`make bootc-image-builder DISK_TYPE=ami`\n",
"recommended": [
"hf.ibm-granite.granite-4.0-micro-GGUF",
"hf.ibm-granite.granite-4.0-tiny-GGUF",
"hf.ibm-granite.granite-3.3-8b-instruct-GGUF"
],
"backend": "llama-cpp",
"languages": ["javascript"],
"frameworks": ["langchain.js", "langgraph", "fastify"]
},
{
"id": "graph-rag",
"description": "This demo provides a recipe to build out a custom Graph RAG (Graph Retrieval Augmented Generation) application using the repo LightRag which abstracts Microsoft's GraphRag implementation. It consists of two main components; the Model Service, and the AI Application with a built in Database.",
"name": "Graph RAG Chat Application",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/graph-rag",
"readme": "# Graph RAG (Retrieval Augmented Generation) Chat Application\nThis demo provides a recipe to build out a custom Graph RAG (Graph Retrieval Augmented Generation) application using the repo LightRag which abstracts Microsoft's GraphRag implementation. It consists of two main components; the Model Service, and the AI Application with a built in Database.\nThere are a few options today for local Model Serving, but this recipe will use [`llama-cpp-python`](https://github.com/abetlen/llama-cpp-python) and their OpenAI compatible Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/llamacpp_python/base/Containerfile`](/model_servers/llamacpp_python/base/Containerfile).\nLightRag simplifies development by handling the Vectordb setup automatically, while also offering experienced developers the flexibility to choose from various Vectordb options based on their preferences for usability and scalability.\nOur AI Application will connect to our Model Service via it's OpenAI compatible API. In this example we rely on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with our Model Service and we use [Streamlit](https://streamlit.io/) for our UI layer. Below please see an example of the RAG application. \n\n## Try the RAG chat application\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Graph Rag` and follow the instructions to start the application.\n\n## Models that work with this Recipe\nNot all models work with this Recipe try out mistral or llama models! \n\n# Build the Application\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the Pod above is built, run, and what purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n## Quickstart\nTo run the application with pre-built images from `quay.io/ai-lab`, use `make quadlet`. This command builds the application's metadata and generates Kubernetes YAML at `./build/graph-rag.yaml` to spin up a Pod that can then be launched locally. Try it with:\n```\nmake quadlet\npodman kube play build/graph-rag.yaml\n```\nThis will take a few minutes if the model and model-server container images need to be downloaded. \nThe Pod is named `graph-rag`, so you may use [Podman](https://podman.io) to manage the Pod and its containers:\n```\npodman pod list\npodman ps\n```\nOnce the Pod and its containers are running, the application can be accessed at `http://localhost:8501`. However, if you started the app via the podman desktop UI, a random port will be assigned instead of `8501`. Please use the AI App Details `Open AI App` button to access it instead. Please refer to the section below for more details about [interacting with the Graph Rag application](#interact-with-the-ai-application).\nTo stop and remove the Pod, run:\n```\npodman pod stop graph-rag\npodman pod rm graph-rag\n```\n\n## Download a model\nIf you are just getting started, we recommend using [granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct). This is a well performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of ways to get a GGUF version of granite-3.3-8b-instruct, but the simplest is to download a pre-converted one from [huggingface.co](https://huggingface.co) here: https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF.\nThe recommended model can be downloaded using the code snippet below:\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf\ncd ../recipes/natural_language_processing/graph-rag\n```\n_A full list of supported open models is forthcoming._ \n\n## Build the Model Service\nThe complete instructions for building and deploying the Model Service can be found in the [llamacpp_python model-service document](../../../model_servers/llamacpp_python/README.md).\nThe Model Service can be built from make commands from the [llamacpp_python directory](../../../model_servers/llamacpp_python/).\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/llamacpp_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/llamacpp_python` set with reasonable defaults:\n```bash\n# from path model_servers/llamacpp_python from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\nThe AI Application can be built from the make command:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/graph-rag from repo containers/ai-lab-recipes)\nmake build\n```\n\n## Deploy the AI Application\nMake sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To deploy the AI application use the following:\n```bash\n# Run this from the current directory (path recipes/natural_language_processing/graph-rag from repo containers/ai-lab-recipes)\nmake run \n```\n\n## Interact with the AI Application\nEverything should now be up an running with the chat application available at [`http://localhost:8501`](http://localhost:8501). By using this recipe and getting this starting point established, users should now have an easier time customizing and building their own LLM enabled graph-rag applications. \n\n## Embed the AI Application in a Bootable Container Image\nTo build a bootable container image that includes this sample graph-rag workload as a service that starts when a system is booted, run: `make -f Makefile bootc`. You can optionally override the default image / tag you want to give the make command by specifying it as follows: `make -f Makefile BOOTC_IMAGE= bootc`.\nSubstituting the bootc/Containerfile FROM command is simple using the Makefile FROM option.\n```bash\nmake FROM=registry.redhat.io/rhel9/rhel-bootc:9.4 bootc\n```\nSelecting the ARCH for the bootc/Containerfile is simple using the Makefile ARCH= variable.\n```\nmake ARCH=x86_64 bootc\n```\nThe magic happens when you have a bootc enabled system running. If you do, and you'd like to update the operating system to the OS you just built\nwith the graph-rag application, it's as simple as ssh-ing into the bootc system and running:\n```bash\nbootc switch quay.io/ai-lab/graph-rag-bootc:latest\n```\nUpon a reboot, you'll see that the graph-rag service is running on the system. Check on the service with:\n```bash\nssh user@bootc-system-ip\nsudo systemctl status graph-rag\n```\n\n### What are bootable containers?\nWhat's a [bootable OCI container](https://containers.github.io/bootc/) and what's it got to do with AI?\nThat's a good question! We think it's a good idea to embed AI workloads (or any workload!) into bootable images at _build time_ rather than at _runtime_. This extends the benefits, such as portability and predictability, that containerizing applications provides to the operating system. Bootable OCI images bake exactly what you need to run your workloads into the operating system at build time by using your favorite containerization tools. Might I suggest [podman](https://podman.io/)?\nOnce installed, a bootc enabled system can be updated by providing an updated bootable OCI image from any OCI image registry with a single `bootc` command. This works especially well for fleets of devices that have fixed workloads - think factories or appliances. Who doesn't want to add a little AI to their appliance, am I right?\nBootable images lend toward immutable operating systems, and the more immutable an operating system is, the less that can go wrong at runtime!\n\n#### Creating bootable disk images\nYou can convert a bootc image to a bootable disk image using the [quay.io/centos-bootc/bootc-image-builder](https://github.com/osbuild/bootc-image-builder) container image.\nThis container image allows you to build and deploy [multiple disk image types](../../common/README_bootc_image_builder.md) from bootc container images.\nDefault image types can be set via the DISK_TYPE Makefile variable.\n`make bootc-image-builder DISK_TYPE=ami`",
"recommended": [],
"backend": "llama-cpp",
"languages": ["python"],
"frameworks": ["streamlit", "lightrag"]
},
{
"id": "audio_to_text",
"description": "This application demonstrate how to use LLM for transcripting an audio into text.",
"name": "Audio to Text",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "generator",
"categories": ["audio"],
"basedir": "recipes/audio/audio_to_text",
"readme": "# Audio to Text Application\n\nThis recipe helps developers start building their own custom AI enabled audio transcription applications. It consists of two main components: the Model Service and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use [`whisper-cpp`](https://github.com/ggerganov/whisper.cpp.git) and its included Model Service. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/whispercpp/base/Containerfile`](/model_servers/whispercpp/base/Containerfile).\n\nThe AI Application will connect to the Model Service via an API. The recipe relies on [Langchain's](https://python.langchain.com/docs/get_started/introduction) python package to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the audio to text application below.\n\n\n \n\n## Try the Audio to Text Application:\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Audio to Text` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the application above is built, run, and what purpose it serves in the overall application. All the recipes use a central [Makefile](../../common/Makefile.common) that includes variables populated with default values to simplify getting started. Please review the [Makefile docs](../../common/README.md), to learn about further customizing your application.\n\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n * [Input audio files](#input-audio-files)\n\n## Download a model\n\nIf you are just getting started, we recommend using [ggerganov/whisper.cpp](https://huggingface.co/ggerganov/whisper.cpp).\nThis is a well performant model with an MIT license.\nIt's simple to download a pre-converted whisper model from [huggingface.co](https://huggingface.co)\nhere: https://huggingface.co/ggerganov/whisper.cpp. There are a number of options, but we recommend to start with `ggml-small.bin`.\n\nThe recommended model can be downloaded using the code snippet below:\n\n```bash\ncd ../../../models\ncurl -sLO https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin\ncd ../recipes/audio/audio_to_text\n```\n\n_A full list of supported open models is forthcoming._\n\n\n## Build the Model Service\n\nThe complete instructions for building and deploying the Model Service can be found in the [whispercpp model-service document](../../../model_servers/whispercpp/README.md).\n\n```bash\n# from path model_servers/whispercpp from repo containers/ai-lab-recipes\nmake build\n```\nCheckout the [Makefile](../../../model_servers/whispercpp/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from `model_servers/whispercpp` set with reasonable defaults:\n\n```bash\n# from path model_servers/whispercpp from repo containers/ai-lab-recipes\nmake run\n```\n\n## Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application\nimage from the [`audio-to-text/`](./) directory.\n\n```bash\n# from path recipes/audio/audio_to_text from repo containers/ai-lab-recipes\npodman build -t audio-to-text app\n```\n### Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image.\nWhen starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`.\nThis could be any appropriately hosted Model Service (running locally or in the cloud) using a compatible API.\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001/inference audio-to-text \n```\n\n### Interact with the AI Application\n\nOnce the streamlit application is up and running, you should be able to access it at `http://localhost:8501`.\nFrom here, you can upload audio files from your local machine and translate the audio files as shown below.\n\nBy using this recipe and getting this starting point established,\nusers should now have an easier time customizing and building their own AI enabled applications.\n\n#### Input audio files\n\nWhisper.cpp requires as an input 16-bit WAV audio files.\nTo convert your input audio files to 16-bit WAV format you can use `ffmpeg` like this:\n\n```bash\nffmpeg -i -ar 16000 -ac 1 -c:a pcm_s16le \n```\n",
"recommended": ["hf.ggerganov.whisper.cpp"],
"backend": "whisper-cpp",
"languages": ["python"],
"frameworks": ["streamlit"]
},
{
"id": "object_detection",
"description": "This recipe illustrates how to use LLM to interact with images and build object detection applications.",
"name": "Object Detection",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "generator",
"categories": ["computer-vision"],
"basedir": "recipes/computer_vision/object_detection",
"readme": "# Object Detection\n\nThis recipe helps developers start building their own custom AI enabled object detection applications. It consists of two main components: the Model Service and the AI Application.\n\nThere are a few options today for local Model Serving, but this recipe will use our FastAPI [`object_detection_python`](../../../model_servers/object_detection_python/src/object_detection_server.py) model server. There is a Containerfile provided that can be used to build this Model Service within the repo, [`model_servers/object_detection_python/base/Containerfile`](/model_servers/object_detection_python/base/Containerfile).\n\nThe AI Application will connect to the Model Service via an API. The recipe relies on [Streamlit](https://streamlit.io/) for the UI layer. You can find an example of the object detection application below.\n\n \n\n## Try the Object Detection Application:\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Object Detection` and follow the instructions to start the application.\n\n# Build the Application\n\nThe rest of this document will explain how to build and run the application from the terminal, and will go into greater detail on how each container in the application above is built, run, and what purpose it serves in the overall application. All the Model Server elements of the recipe use a central Model Server [Makefile](../../../model_servers/common/Makefile.common) that includes variables populated with default values to simplify getting started. Currently we do not have a Makefile for the Application elements of the Recipe, but this coming soon, and will leverage the recipes common [Makefile](../../common/Makefile.common) to provide variable configuration and reasonable defaults to this Recipe's application.\n\n* [Download a model](#download-a-model)\n* [Build the Model Service](#build-the-model-service)\n* [Deploy the Model Service](#deploy-the-model-service)\n* [Build the AI Application](#build-the-ai-application)\n* [Deploy the AI Application](#deploy-the-ai-application)\n* [Interact with the AI Application](#interact-with-the-ai-application)\n\n## Download a model\n\nIf you are just getting started, we recommend using [facebook/detr-resnet-101](https://huggingface.co/facebook/detr-resnet-101).\nThis is a well performant model with an Apache-2.0 license.\nIt's simple to download a copy of the model from [huggingface.co](https://huggingface.co)\n\nYou can use the `download-model-facebook-detr-resnet-101` make target in the `model_servers/object_detection_python` directory to download and move the model into the models directory for you:\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\n make download-model-facebook-detr-resnet-101\n```\n\n## Build the Model Service\n\nThe You can build the Model Service from the [object_detection_python model-service directory](../../../model_servers/object_detection_python).\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\nmake build\n```\n\nCheckout the [Makefile](../../../model_servers/object_detection_python/Makefile) to get more details on different options for how to build.\n\n## Deploy the Model Service\n\nThe local Model Service relies on a volume mount to the localhost to access the model files. It also employs environment variables to dictate the model used and where its served. You can start your local Model Service using the following `make` command from the [`model_servers/object_detection_python`](../../../model_servers/object_detection_python) directory, which will be set with reasonable defaults:\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\nmake run\n```\n\nAs stated above, by default the model service will use [`facebook/detr-resnet-101`](https://huggingface.co/facebook/detr-resnet-101). However you can use other compatabale models. Simply pass the new `MODEL_NAME` and `MODEL_PATH` to the make command. Make sure the model is downloaded and exists in the [models directory](../../../models/):\n\n```bash\n# from path model_servers/object_detection_python from repo containers/ai-lab-recipes\nmake MODEL_NAME=facebook/detr-resnet-50 MODEL_PATH=/models/facebook/detr-resnet-50 run\n```\n\n## Build the AI Application\n\nNow that the Model Service is running we want to build and deploy our AI Application. Use the provided Containerfile to build the AI Application\nimage from the [`object_detection/`](./) recipe directory.\n\n```bash\n# from path recipes/computer_vision/object_detection from repo containers/ai-lab-recipes\npodman build -t object_detection_client .\n```\n\n### Deploy the AI Application\n\nMake sure the Model Service is up and running before starting this container image.\nWhen starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`.\nThis could be any appropriately hosted Model Service (running locally or in the cloud) using a compatible API.\nThe following Podman command can be used to run your AI Application:\n\n```bash\npodman run -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8000/detection object_detection_client\n```\n\n### Interact with the AI Application\n\nOnce the client is up a running, you should be able to access it at `http://localhost:8501`. From here you can upload images from your local machine and detect objects in the image as shown below. \n\nBy using this recipe and getting this starting point established,\nusers should now have an easier time customizing and building their own AI enabled applications.\n",
"recommended": ["hf.facebook.detr-resnet-101"],
"backend": "none",
"languages": ["python"],
"frameworks": ["streamlit"]
},
{
"id": "chatbot-llama-stack",
"description": "This recipe provides a blueprint for developers to create their own AI-powered chat applications using Streamlit and llama-stack.",
"name": "ChatBot using Llama Stack",
"repository": "https://github.com/containers/ai-lab-recipes",
"ref": "v1.8.0",
"icon": "natural-language-processing",
"categories": ["natural-language-processing"],
"basedir": "recipes/natural_language_processing/chatbot-llama-stack",
"readme": "# Chat Application\n\n This recipe helps developers start building their own custom LLM enabled chat applications.\n\n There are a few options today for local Model Serving, but this recipe will use [`Llama Stack`](https://llama-stack.readthedocs.io/en/latest/).\n\n The AI Application will connect to the Model Service via its API. The recipe relies on [Llama Stack Client Python SDK](https://github.com/meta-llama/llama-stack-client-python) to simplify communication with the Model Service and uses [Streamlit](https://streamlit.io/) for the UI layer. \n\n## Try the Chat Application\n\nThe [Podman Desktop](https://podman-desktop.io) [AI Lab Extension](https://github.com/containers/podman-desktop-extension-ai-lab) includes this recipe among others. To try it out, open `Recipes Catalog` -> `Chatbot using Llama Stack` and follow the instructions to start the application.\n",
"backend": "llama-stack",
"languages": ["python"],
"frameworks": ["streamlit", "llama-stack"]
}
],
"models": [
{
"id": "hf.mistralai.mistral-small-3.2-24b-instruct-2506",
"name": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
"description": "Mistral-Small-3.2-24B-Instruct-2506 is a minor update of [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).\r\n\r\nSmall-3.2 improves in the following categories:\r\n- **Instruction following**: Small-3.2 is better at following precise instructions\r\n- **Repetition errors**: Small-3.2 produces less infinite generations or repetitive answers\r\n- **Function calling**: Small-3.2's function calling template is more robust (see [here](https://github.com/mistralai/mistral-common/blob/535b4d0a0fc94674ea17db6cf8dc2079b81cbcfa/src/mistral_common/tokens/tokenizers/instruct.py#L778) and [examples](#function-calling))\r\n\r\nIn all other categories Small-3.2 should match or slightly improve compared to [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).\r\n\r\n## Key Features\r\n- same as [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503#key-features)\r\n\r\n## Benchmark Results\r\nWe compare Mistral-Small-3.2-24B to [Mistral-Small-3.1-24B-Instruct-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).\r\nFor more comparison against other models of similar size, please check [Mistral-Small-3.1's Benchmarks'](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503#benchmark-results)\r\n\r\n### Text \r\n#### Instruction Following / Chat / Tone\r\n| Model | Wildbench v2 | Arena Hard v2 | IF (Internal; accuracy) |\r\n|-------|---------------|---------------|------------------------|\r\n| Small 3.1 24B Instruct | 55.6% | 19.56% | 82.75% |\r\n| **Small 3.2 24B Instruct** | **65.33%** | **43.1%** | **84.78%** |\r\n\r\n#### Infinite Generations\r\nSmall 3.2 reduces infinite generations by 2x on challenging, long and repetitive prompts.\r\n| Model | Infinite Generations (Internal; Lower is better) |\r\n|-------|-------|\r\n| Small 3.1 24B Instruct | 2.11% |\r\n| **Small 3.2 24B Instruct** | **1.29%** |\r\n\r\n#### STEM\r\n| Model | MMLU | MMLU Pro (5-shot CoT) | MATH | GPQA Main (5-shot CoT) | GPQA Diamond (5-shot CoT) | MBPP Plus - Pass@5 | HumanEval Plus - Pass@5 | SimpleQA (TotalAcc) |\r\n|-------|------|---------------------|------|------------------------|---------------------------|-------------------|-------------------------|-------------------|\r\n| Small 3.1 24B Instruct | 80.62% | 66.76% | 69.30% | 44.42% | 45.96% | 74.63% | 88.99% | 10.43% |\r\n| **Small 3.2 24B Instruct** | 80.50% | **69.06%** | 69.42% | 44.22% | 46.13% | **78.33%** | **92.90%** | **12.10%** |\r\n\r\n### Vision\r\n| Model | MMMU | Mathvista | ChartQA | DocVQA | AI2D |\r\n|-------|------|-----------|---------|--------|------|\r\n| Small 3.1 24B Instruct | **64.00%** | **68.91%** | 86.24% | 94.08% | 93.72% |\r\n| **Small 3.2 24B Instruct** | 62.50% | 67.09% | **87.4%** | 94.86% | 92.91% |\r\n\r\n## Usage\r\nThe model can be used with the following frameworks:\r\n- [`vllm (recommended)`](https://github.com/vllm-project/vllm)\r\n- [`transformers`](https://github.com/huggingface/transformers)\r\n\r\n**Note 1**: We recommend using a relatively low temperature, such as `temperature=0.15`.\r\n**Note 2**: Add a system prompt from [SYSTEM_PROMPT.txt](https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506/blob/main/SYSTEM_PROMPT.txt) for best results.\r\n\r\n### vLLM (recommended)\r\n#### Installation\r\n```\r\npip install vllm --upgrade\r\n```\r\nCheck installation:\r\n```\r\npython -c \"import mistral_common; print(mistral_common.__version__)\"\r\n```\r\n#### Serve\r\n```\r\nvllm serve mistralai/Mistral-Small-3.2-24B-Instruct-2506 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --limit_mm_per_prompt 'image=10' --tensor-parallel-size 2\r\n```\r\nRequires ~55 GB GPU RAM in bf16/fp16.\r\n\r\n#### Function Calling, Vision Reasoning & Instruction Following\r\nSupports multi-modal reasoning, function/tool calls, and precise instruction following using vLLM API or Transformers. See examples in original README.\r\n\r\n### Transformers\r\nInstall:\r\n```\r\npip install mistral-common --upgrade\r\n```\r\nUse `MistralTokenizer` and `Mistral3ForConditionalGeneration` with the system prompt and optional images for reasoning. Multi-modal inputs and outputs supported. Refer to Python snippets for examples of instruction following, vision reasoning, and function calls.",
"license": "Apache-2.0",
"url": "https://huggingface.co/unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF/resolve/main/Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf",
"memory": 14300000000,
"sha256": "a3cc56310807ed0d145eaf9f018ccda9ae7ad8edb41ec870aa2454b0d4700b3c",
"backend": "llama-cpp",
"properties": {
"jinja": "true"
}
},
{
"id": "hf.openai.gpt-oss-20b",
"name": "openai/gpt-oss-20b (Unsloth quantization)",
"description": "\r\n# Welcome to the gpt-oss series, [OpenAI’s open-weight models](https://openai.com/open-models) designed for powerful reasoning, agentic tasks, and versatile developer use cases.\r\n\r\nWe’re releasing two flavors of the open models:\r\n- `gpt-oss-120b` — for production, general purpose, high reasoning use cases that fits into a single H100 GPU (117B parameters with 5.1B active parameters)\r\n- `gpt-oss-20b` — for lower latency, and local or specialized use cases (21B parameters with 3.6B active parameters)\r\n\r\nBoth models were trained on our [harmony response format](https://github.com/openai/harmony) and should only be used with the harmony format as it will not work correctly otherwise.\r\n\r\n> [!NOTE]\r\n> This model card is dedicated to the smaller `gpt-oss-20b` model. Check out [`gpt-oss-120b`](https://huggingface.co/openai/gpt-oss-120b) for the larger model.\r\n\r\n# Highlights\r\n\r\n* **Permissive Apache 2.0 license:** Build freely without copyleft restrictions or patent risk—ideal for experimentation, customization, and commercial deployment.\r\n* **Configurable reasoning effort:** Easily adjust the reasoning effort (low, medium, high) based on your specific use case and latency needs.\r\n* **Full chain-of-thought:** Gain complete access to the model’s reasoning process, facilitating easier debugging and increased trust in outputs. It’s not intended to be shown to end users.\r\n* **Fine-tunable:** Fully customize models to your specific use case through parameter fine-tuning.\r\n* **Agentic capabilities:** Use the models’ native capabilities for function calling, [web browsing](https://github.com/openai/gpt-oss/tree/main?tab=readme-ov-file#browser), [Python code execution](https://github.com/openai/gpt-oss/tree/main?tab=readme-ov-file#python), and Structured Outputs.\r\n* **Native MXFP4 quantization:** The models are trained with native MXFP4 precision for the MoE layer, making `gpt-oss-120b` run on a single H100 GPU and the `gpt-oss-20b` model run within 16GB of memory.\r\n\r\n---\r\n\r\n# Inference examples\r\n\r\n## Transformers\r\nYou can use `gpt-oss-120b` and `gpt-oss-20b` with Transformers. If you use the Transformers chat template, it will automatically apply the [harmony response format](https://github.com/openai/harmony). If you use `model.generate` directly, you need to apply the harmony format manually using the chat template or use our [openai-harmony](https://github.com/openai/harmony) package.\r\n\r\nTo get started, install the necessary dependencies:\r\n```\r\npip install -U transformers kernels torch \r\n```\r\n\r\n```py\r\nfrom transformers import pipeline\r\nimport torch\r\n\r\nmodel_id = \"openai/gpt-oss-20b\"\r\n\r\npipe = pipeline(\r\n \"text-generation\",\r\n model=model_id,\r\n torch_dtype=\"auto\",\r\n device_map=\"auto\",\r\n)\r\n\r\nmessages = [\r\n {\"role\": \"user\", \"content\": \"Explain quantum mechanics clearly and concisely.\"},\r\n]\r\n\r\noutputs = pipe(\r\n messages,\r\n max_new_tokens=256,\r\n)\r\nprint(outputs[0][\"generated_text\"][-1])\r\n```\r\n\r\n## vLLM\r\nvLLM recommends using [uv](https://docs.astral.sh/uv/) for Python dependency management. You can spin up an OpenAI-compatible webserver:\r\n```\r\nuv pip install --pre vllm==0.10.1+gptoss \\\r\n --extra-index-url https://wheels.vllm.ai/gpt-oss/ \\\r\n --extra-index-url https://download.pytorch.org/whl/nightly/cu128 \\\r\n --index-strategy unsafe-best-match\r\n\r\nvllm serve openai/gpt-oss-20b\r\n```\r\n\r\n## PyTorch / Triton\r\nSee [reference implementations](https://github.com/openai/gpt-oss?tab=readme-ov-file#reference-pytorch-implementation).\r\n\r\n## Ollama\r\n```bash\r\n# gpt-oss-20b\r\nollama pull gpt-oss:20b\r\nollama run gpt-oss:20b\r\n```\r\n\r\n## LM Studio\r\n```bash\r\n# gpt-oss-20b\r\nlms get openai/gpt-oss-20b\r\n```\r\n\r\n# Download the model\r\n```bash\r\n# gpt-oss-20b\r\nhuggingface-cli download openai/gpt-oss-20b --include \"original/*\" --local-dir gpt-oss-20b/\npip install gpt-oss\npython -m gpt_oss.chat model/\r\n```\r\n\r\n# Reasoning levels\r\n* **Low:** Fast responses for general dialogue.\r\n* **Medium:** Balanced speed and detail.\r\n* **High:** Deep and detailed analysis.\r\n\r\n# Tool use\r\n* Web browsing (built-in tools)\r\n* Function calling with schemas\r\n* Agentic operations\r\n\r\n# Fine-tuning\r\nThe smaller model `gpt-oss-20b` can be fine-tuned on consumer hardware, larger `gpt-oss-120b` can be fine-tuned on a single H100 node.",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/unsloth/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-Q4_K_M.gguf",
"memory": 11600000000,
"sha256": "c27536640e410032865dc68781d80a08b98f8db5e93575919af8ccc0568aeb4f",
"backend": "llama-cpp"
},
{
"id": "hf.qwen.qwen3-4b-GGUF",
"name": "qwen/qwen3-4b-GGUF",
"description": "\r\n# Qwen3-4B-GGUF\r\n\r\n
\r\n<\/a>\r\n\r\n## Qwen3 Highlights\r\n\r\nQwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:\r\n\r\n- **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.\r\n- **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.\r\n- **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.\r\n- **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.\r\n- **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.\r\n\r\n\r\n## Model Overview\r\n\r\n**Qwen3-4B** has the following features:\r\n- Type: Causal Language Models\r\n- Training Stage: Pretraining & Post-training\r\n- Number of Parameters: 4.0B\r\n- Number of Paramaters (Non-Embedding): 3.6B\r\n- Number of Layers: 36\r\n- Number of Attention Heads (GQA): 32 for Q and 8 for KV\r\n- Context Length: 32,768 natively and [131,072 tokens with YaRN](#processing-long-texts).\r\n\r\n- Quantization: q4_K_M, q5_0, q5_K_M, q6_K, q8_0\r\n\r\nFor more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https:\/\/qwenlm.github.io\/blog\/qwen3\/), [GitHub](https:\/\/github.com\/QwenLM\/Qwen3), and [Documentation](https:\/\/qwen.readthedocs.io\/en\/latest\/).\r\n\r\n## Quickstart\r\n\r\n### llama.cpp\r\n\r\nCheck out our [llama.cpp documentation](https:\/\/qwen.readthedocs.io\/en\/latest\/run_locally\/llama.cpp.html) for more usage guide.\r\n\r\nWe advise you to clone [`llama.cpp`](https:\/\/github.com\/ggerganov\/llama.cpp) and install it following the official guide. We follow the latest version of llama.cpp. \r\nIn the following demonstration, we assume that you are running commands under the repository `llama.cpp`.\r\n\r\n```shell\r\n.\/llama-cli -hf Qwen\/Qwen3-4B-GGUF:Q8_0 --jinja --color -ngl 99 -fa -sm row --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --presence-penalty 1.5 -c 40960 -n 32768 --no-context-shift\r\n```\r\n\r\n### ollama\r\n\r\nCheck out our [ollama documentation](https:\/\/qwen.readthedocs.io\/en\/latest\/run_locally\/ollama.html) for more usage guide.\r\n\r\nYou can run Qwen3 with one command:\r\n\r\n```shell\r\nollama run hf.co\/Qwen\/Qwen3-4B-GGUF:Q8_0\r\n```\r\n\r\n## Switching Between Thinking and Non-Thinking Mode\r\n\r\nYou can add `\/think` and `\/no_think` to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations.\r\n\r\nHere is an example of multi-turn conversation:\r\n\r\n```\r\n> Who are you \/no_think\r\n\r\n\r\n\r\n<\/think>\r\n\r\nI am Qwen, a large-scale language model developed by Alibaba Cloud. [...]\r\n\r\n> How many 'r's are in 'strawberries'? \/think\r\n\r\n\r\nOkay, let's see. The user is asking how many times the letter 'r' appears in the word \"strawberries\". [...]\r\n<\/think>\r\n\r\nThe word strawberries contains 3 instances of the letter r. [...]\r\n```\r\n\r\n\r\n## Processing Long Texts\r\n\r\nQwen3 natively supports context lengths of up to 32,768 tokens. For conversations where the total length (including both input and output) significantly exceeds this limit, we recommend using RoPE scaling techniques to handle long texts effectively. We have validated the model's performance on context lengths of up to 131,072 tokens using the [YaRN](https:\/\/arxiv.org\/abs\/2309.00071) method.\r\n\r\nTo enable YARN in ``llama.cpp``:\r\n\r\n```shell\r\n.\/llama-cli ... -c 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768\r\n```\r\n\r\n> [!NOTE]\r\n> All the notable open-source frameworks implement static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.**\r\n> We advise adding the `rope_scaling` configuration only when processing long contexts is required. \r\n> It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 65,536 tokens, it would be better to set `factor` as 2.0. \r\n\r\n> [!TIP]\r\n> The endpoint provided by Alibaba Model Studio supports dynamic YaRN by default and no extra configuration is needed.\r\n\r\n\r\n## Best Practices\r\n\r\nTo achieve optimal performance, we recommend the following settings:\r\n\r\n1. **Sampling Parameters**:\r\n - For thinking mode (`enable_thinking=True`), use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, `MinP=0`, and `PresencePenalty=1.5`. **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions.\r\n - For non-thinking mode (`enable_thinking=False`), we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, `MinP=0`, and `PresencePenalty=1.5`.\r\n - **We recommend setting `presence_penalty` to 1.5 for quantized models to suppress repetitive outputs.** You can adjust the `presence_penalty` parameter between 0 and 2. A higher value may occasionally lead to language mixing and a slight reduction in model performance. \r\n\r\n2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 38,912 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.\r\n\r\n3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.\r\n - **Math Problems**: Include \"Please reason step by step, and put your final answer within \\boxed{}.\" in the prompt.\r\n - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: \"Please show your choice in the `answer` field with only the choice letter, e.g., `\"answer\": \"C\"`.\"\r\n\r\n4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.\r\n\r\n### Citation\r\n\r\nIf you find our work helpful, feel free to give us a cite.\r\n\r\n```\r\n@misc{qwen3technicalreport,\r\n title={Qwen3 Technical Report}, \r\n author={Qwen Team},\r\n year={2025},\r\n eprint={2505.09388},\r\n archivePrefix={arXiv},\r\n primaryClass={cs.CL},\r\n url={https:\/\/arxiv.org\/abs\/2505.09388}, \r\n}\r\n```",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/Qwen/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf",
"sha256": "7485fe6f11af29433bc51cab58009521f205840f5b4ae3a32fa7f92e8534fdf5",
"backend": "llama-cpp"
},
{
"id": "hf.unsloth.qwen3-4b-thinking-GGUF",
"name": "qwen/Qwen3-4B-Thinking-2507-GGUF (Unsloth quantization)",
"description": "---\nlibrary_name: transformers\nlicense: apache-2.0\nlicense_link: https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507/blob/main/LICENSE\nbase_model:\n- Qwen/Qwen3-4B-Thinking-2507\ntags:\n- qwen\n- qwen3\n- unsloth\n---\n\n
\n See our collection for all versions of Qwen3 including GGUF, 4-bit & 16-bit formats.\n
\n
\n Learn to run Qwen3-2507 correctly - Read our Guide.\n
\n
\n Unsloth Dynamic 2.0 achieves superior accuracy & outperforms other leading quants.\n
\n
\n
✨ Read our Qwen3-2507 Guide here!
\n
\n\n- Fine-tune Qwen3 (14B) for free using our Google [Colab notebook here](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n- Read our Blog about Qwen3 support: [unsloth.ai/blog/qwen3](https://unsloth.ai/blog/qwen3)\n- View the rest of our notebooks in our [docs here](https://docs.unsloth.ai/get-started/unsloth-notebooks).\n- Run & export your fine-tuned model to Ollama, llama.cpp or HF.\n\n| Unsloth supports | Free Notebooks | Performance | Memory use |\n|-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|\n| **Qwen3 (14B)** | [▶\uFE0F Start on Colab](https://docs.unsloth.ai/get-started/unsloth-notebooks) | 3x faster | 70% less |\n| **GRPO with Qwen3 (8B)** | [▶\uFE0F Start on Colab](https://docs.unsloth.ai/get-started/unsloth-notebooks) | 3x faster | 80% less |\n| **Llama-3.2 (3B)** | [▶\uFE0F Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb) | 2.4x faster | 58% less |\n| **Llama-3.2 (11B vision)** | [▶\uFE0F Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb) | 2x faster | 60% less |\n| **Qwen2.5 (7B)** | [▶\uFE0F Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(7B)-Alpaca.ipynb) | 2x faster | 60% less |\n\n# Qwen3-4B-Thinking-2507\n\n
\n\n\n## Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of Qwen3-4B, improving both the **quality and depth** of reasoning. We are pleased to introduce **Qwen3-4B-Thinking-2507**, featuring the following key enhancements:\n\n- **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, and academic benchmarks that typically require human expertise.\n- **Markedly better general capabilities**, such as instruction following, tool usage, text generation, and alignment with human preferences.\n- **Enhanced 256K long-context understanding** capabilities.\n\n**NOTE**: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n\n\n## Model Overview\n\n**Qwen3-4B-Thinking-2507** has the following features:\n- Type: Causal Language Models\n- Training Stage: Pretraining & Post-training\n- Number of Parameters: 4.0B\n- Number of Paramaters (Non-Embedding): 3.6B\n- Number of Layers: 36\n- Number of Attention Heads (GQA): 32 for Q and 8 for KV\n- Context Length: **262,144 natively**. \n\n**NOTE: This model supports only thinking mode. Meanwhile, specifying `enable_thinking=True` is no longer required.**\n\nAdditionally, to enforce model thinking, the default chat template automatically includes ``. Therefore, it is normal for the model's output to contain only `` without an explicit opening `` tag.\n\nFor more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3/), [GitHub](https://github.com/QwenLM/Qwen3), and [Documentation](https://qwen.readthedocs.io/en/latest/).\n\n\n## Performance\n\n\n| | Qwen3-30B-A3B Thinking | Qwen3-4B Thinking | Qwen3-4B-Thinking-2507 |\n|--- | --- | --- | --- |\n| **Knowledge** | | |\n| MMLU-Pro | **78.5** | 70.4 | 74.0 |\n| MMLU-Redux | **89.5** | 83.7 | 86.1 |\n| GPQA | **65.8** | 55.9 | **65.8** |\n| SuperGPQA | **51.8** | 42.7 | 47.8 |\n| **Reasoning** | | |\n| AIME25 | 70.9 | 65.6 | **81.3** |\n| HMMT25 | 49.8 | 42.1 | **55.5** |\n| LiveBench 20241125 | **74.3** | 63.6 | 71.8 |\n| **Coding** | | |\n| LiveCodeBench v6 (25.02-25.05) | **57.4** | 48.4 | 55.2 |\n| CFEval | **1940** | 1671 | 1852 |\n| OJBench | **20.7** | 16.1 | 17.9 |\n| **Alignment** | | |\n| IFEval | 86.5 | 81.9 | **87.4** |\n| Arena-Hard v2$ | **36.3** | 13.7 | 34.9 |\n| Creative Writing v3 | **79.1** | 61.1 | 75.6 |\n| WritingBench | 77.0 | 73.5 | **83.3** |\n| **Agent** | | |\n| BFCL-v3 | 69.1 | 65.9 | **71.2** |\n| TAU1-Retail | 61.7 | 33.9 | **66.1** |\n| TAU1-Airline | 32.0 | 32.0 | **48.0** |\n| TAU2-Retail | 34.2 | 38.6 | **53.5** |\n| TAU2-Airline | 36.0 | 28.0 | **58.0** |\n| TAU2-Telecom | 22.8 | 17.5 | **27.2** |\n| **Multilingualism** | | |\n| MultiIF | 72.2 | 66.3 | **77.3** |\n| MMLU-ProX | **73.1** | 61.0 | 64.2 |\n| INCLUDE | **71.9** | 61.8 | 64.4 |\n| PolyMATH | 46.1 | 40.0 | **46.2** |\n\n$ For reproducibility, we report the win rates evaluated by GPT-4.1.\n\n\\& For highly challenging tasks (including PolyMATH and all reasoning and coding tasks), we use an output length of 81,920 tokens. For all other tasks, we set the output length to 32,768.\n\n## Quickstart\n\nThe code of Qwen3 has been in the latest Hugging Face `transformers` and we advise you to use the latest version of `transformers`.\n\nWith `transformers<4.51.0`, you will encounter the following error:\n```\nKeyError: 'qwen3'\n```\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs. \n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = \"Qwen/Qwen3-4B-Thinking-2507\"\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n torch_dtype=\"auto\",\n device_map=\"auto\"\n)\n\n# prepare the model input\nprompt = \"Give me a short introduction to large language model.\"\nmessages = [\n {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True,\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=32768\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() \n\n# parsing thinking content\ntry:\n # rindex finding 151668 ()\n index = len(output_ids) - output_ids[::-1].index(151668)\nexcept ValueError:\n index = 0\n\nthinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip(\"\\n\")\ncontent = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip(\"\\n\")\n\nprint(\"thinking content:\", thinking_content) # no opening tag\nprint(\"content:\", content)\n\n```\n\nFor deployment, you can use `sglang>=0.4.6.post1` or `vllm>=0.8.5` or to create an OpenAI-compatible API endpoint:\n- SGLang:\n ```shell\n python -m sglang.launch_server --model-path Qwen/Qwen3-4B-Thinking-2507 --context-length 262144 --reasoning-parser deepseek-r1\n ```\n- vLLM:\n ```shell\n vllm serve Qwen/Qwen3-4B-Thinking-2507 --max-model-len 262144 --enable-reasoning --reasoning-parser deepseek_r1\n ```\n\n**Note: If you encounter out-of-memory (OOM) issues, you may consider reducing the context length to a smaller value. However, since the model may require longer token sequences for reasoning, we strongly recommend using a context length greater than 131,072 when possible.**\n\nFor local use, applications such as Ollama, LMStudio, MLX-LM, llama.cpp, and KTransformers have also supported Qwen3.\n\n## Agentic Use\n\nQwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.\n\nTo define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.\n```python\nfrom qwen_agent.agents import Assistant\n\n# Define LLM\n# Using OpenAI-compatible API endpoint. It is recommended to disable the reasoning and the tool call parsing\n# functionality of the deployment frameworks and let Qwen-Agent automate the related operations. For example, \n# `VLLM_USE_MODELSCOPE=true vllm serve Qwen/Qwen3-4B-Thinking-2507 --served-model-name Qwen3-4B-Thinking-2507 --max-model-len 262144`.\nllm_cfg = {\n 'model': 'Qwen3-4B-Thinking-2507',\n\n # Use a custom endpoint compatible with OpenAI API:\n 'model_server': 'http://localhost:8000/v1', # api_base without reasoning and tool call parsing\n 'api_key': 'EMPTY',\n 'generate_cfg': {\n 'thought_in_content': True,\n },\n}\n\n# Define Tools\ntools = [\n {'mcpServers': { # You can specify the MCP configuration file\n 'time': {\n 'command': 'uvx',\n 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']\n },\n \"fetch\": {\n \"command\": \"uvx\",\n \"args\": [\"mcp-server-fetch\"]\n }\n }\n },\n 'code_interpreter', # Built-in tools\n]\n\n# Define Agent\nbot = Assistant(llm=llm_cfg, function_list=tools)\n\n# Streaming generation\nmessages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]\nfor responses in bot.run(messages=messages):\n pass\nprint(responses)\n```\n\n## Best Practices\n\nTo achieve optimal performance, we recommend the following settings:\n\n1. **Sampling Parameters**:\n - We suggest using `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0`.\n - For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.\n\n2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 81,920 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.\n\n3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.\n - **Math Problems**: Include \"Please reason step by step, and put your final answer within \\boxed{}.\" in the prompt.\n - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: \"Please show your choice in the `answer` field with only the choice letter, e.g., `\"answer\": \"C\"`.\"\n\n4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.\n\n\n### Citation\n\nIf you find our work helpful, feel free to give us a cite.\n\n```\n@misc{qwen3technicalreport,\n title={Qwen3 Technical Report}, \n author={Qwen Team},\n year={2025},\n eprint={2505.09388},\n archivePrefix={arXiv},\n primaryClass={cs.CL},\n url={https://arxiv.org/abs/2505.09388}, \n}\n```",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/unsloth/Qwen3-4B-Thinking-2507-GGUF/resolve/main/Qwen3-4B-Thinking-2507-Q4_K_M.gguf",
"backend": "llama-cpp",
"sha256": "ddd52e18200baab281c5c46f70d544ce4d4fe4846eab1608f2fff48a64554212",
"properties": {
"jinja": "true"
}
},
{
"id": "hf.ibm-granite.granite-4.0-tiny-GGUF",
"name": "ibm-granite/granite-4.0-tiny-GGUF",
"description": "# Granite-4.0-H-Tiny\n\n**Model Summary:**\nGranite-4.0-H-Tiny is a 7B parameter long-context instruct model finetuned from *Granite-4.0-H-Tiny-Base* using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved *instruction following (IF)* and *tool-calling* capabilities, making them more effective in enterprise applications.\n\n- **Developers:** Granite Team, IBM\n- **HF Collection:** [Granite 4.0 Language Models HF Collection](https://huggingface.co/collections/ibm-granite/granite-40-language-models-6811a18b820ef362d9e5a82c)\n- **GitHub Repository:** [ibm-granite/granite-4.0-language-models](https://github.com/ibm-granite/granite-4.0-language-models)\n- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/) \n- **Release Date**: October 2nd, 2025\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Supported Languages:** \nEnglish, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 4.0 models for languages beyond these languages.\n\n**Intended use:** \nThe model is designed to respond to general instructions and can be used to build AI assistants for multiple domains, including business applications.\n\n*Capabilities*\n* Summarization\n* Text classification\n* Text extraction\n* Question-answering\n* Retrieval Augmented Generation (RAG)\n* Code related tasks\n* Function-calling tasks\n* Multilingual dialog use cases\n* Fill-In-the-Middle (FIM) code completions\n\n\n \n**Generation:** \nThis is a simple example of how to use Granite-4.0-H-Tiny model.\n\nInstall the following libraries:\n\n```shell\npip install torch torchvision torchaudio\npip install accelerate\npip install transformers\n```\nThen, copy the snippet from the section that is relevant for your use case.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\ndevice = \"cuda\"\nmodel_path = \"ibm-granite/granite-4.0-h-tiny\"\ntokenizer = AutoTokenizer.from_pretrained(model_path)\n# drop device_map if running on CPU\nmodel = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)\nmodel.eval()\n# change input text as desired\nchat = [\n { \"role\": \"user\", \"content\": \"Please list one IBM Research laboratory located in the United States. You should only output its name and location.\" },\n]\nchat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)\n# tokenize the text\ninput_tokens = tokenizer(chat, return_tensors=\"pt\").to(device)\n# generate output tokens\noutput = model.generate(**input_tokens, \n max_new_tokens=100)\n# decode output tokens into text\noutput = tokenizer.batch_decode(output)\n# print output\nprint(output[0])\n```\n\nExpected output:\n```shell\n<|start_of_role|>user<|end_of_role|>Please list one IBM Research laboratory located in the United States. You should only output its name and location.<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Almaden Research Center, San Jose, California<|end_of_text|>\n```\n\n**Tool-calling:** \nGranite-4.0-H-Tiny comes with enhanced tool calling capabilities, enabling seamless integration with external functions and APIs. To define a list of tools please follow OpenAI's function [definition schema](https://platform.openai.com/docs/guides/function-calling?api-mode=responses#defining-functions). \n\nThis is an example of how to use Granite-4.0-H-Tiny model tool-calling ability:\n\n```python\ntools = [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"description\": \"Get the current weather for a specified city.\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"city\": {\n \"type\": \"string\",\n \"description\": \"Name of the city\"\n }\n },\n \"required\": [\"city\"]\n }\n }\n }\n]\n\n# change input text as desired\nchat = [\n { \"role\": \"user\", \"content\": \"What's the weather like in Boston right now?\" },\n]\nchat = tokenizer.apply_chat_template(chat, \\\n tokenize=False, \\\n tools=tools, \\\n add_generation_prompt=True)\n# tokenize the text\ninput_tokens = tokenizer(chat, return_tensors=\"pt\").to(device)\n# generate output tokens\noutput = model.generate(**input_tokens, \n max_new_tokens=100)\n# decode output tokens into text\noutput = tokenizer.batch_decode(output)\n# print output\nprint(output[0])\n```\n\nExpected output:\n```shell\n<|start_of_role|>system<|end_of_role|>You are a helpful assistant with access to the following tools. You may call one or more tools to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"description\": \"Get the current weather for a specified city.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"string\", \"description\": \"Name of the city\"}}, \"required\": [\"city\"]}}}\n\n\nFor each tool call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>What's the weather like in Boston right now?<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n{\"name\": \"get_current_weather\", \"arguments\": {\"city\": \"Boston\"}}\n<|end_of_text|>\n```\n\n\n\n**Evaluation Results:** \n\n\n\n\n \n | Benchmarks | \n Metric | \n Micro Dense | \n H Micro Dense | \n H Tiny MoE | \n H Small MoE | \n
\n\n \n\n | \n General Tasks\n | \n
\n\n | MMLU | \n 5-shot | \n 65.98 | \n 67.43 | \n 68.65 | \n 78.44 | \n
\n\n | MMLU-Pro | \n 5-shot, CoT | \n 44.5 | \n 43.48 | \n 44.94 | \n 55.47 | \n
\n\n | BBH | \n 3-shot, CoT | \n 72.48 | \n 69.36 | \n 66.34 | \n 81.62 | \n
\n\n | AGI EVAL | \n 0-shot, CoT | \n 64.29 | \n 59 | \n 62.15 | \n 70.63 | \n
\n\n | GPQA | \n 0-shot, CoT | \n 30.14 | \n 32.15 | \n 32.59 | \n 40.63 | \n
\n\n | \n Alignment Tasks\n | \n
\n\n | AlpacaEval 2.0 | \n | \n 29.49 | \n 31.49 | \n 30.61 | \n 42.48 | \n
\n\n | IFEval | \n Instruct, Strict | \n 85.5 | \n 86.94 | \n 84.78 | \n 89.87 | \n
\n\n | IFEval | \n Prompt, Strict | \n 79.12 | \n 81.71 | \n 78.1 | \n 85.22 | \n
\n\n | IFEval | \n Average | \n 82.31 | \n 84.32 | \n 81.44 | \n 87.55 | \n
\n\n | ArenaHard | \n | \n 25.84 | \n 36.15 | \n 35.75 | \n 46.48 | \n
\n\n | \n Math Tasks\n | \n
\n\n | GSM8K | \n 8-shot | \n 85.45 | \n 81.35 | \n 84.69 | \n 87.27 | \n
\n\n | GSM8K Symbolic | \n 8-shot | \n 79.82 | \n 77.5 | \n 81.1 | \n 87.38 | \n
\n\n | Minerva Math | \n 0-shot, CoT | \n 62.06 | \n 66.44 | \n 69.64 | \n 74 | \n
\n\n | DeepMind Math | \n 0-shot, CoT | \n 44.56 | \n 43.83 | \n 49.92 | \n 59.33 | \n
\n\n | \n Code Tasks\n | \n
\n\n | HumanEval | \n pass@1 | \n 80 | \n 81 | \n 83 | \n 88 | \n
\n\n | HumanEval+ | \n pass@1 | \n 72 | \n 75 | \n 76 | \n 83 | \n
\n\n | MBPP | \n pass@1 | \n 72 | \n 73 | \n 80 | \n 84 | \n
\n\n | MBPP+ | \n pass@1 | \n 64 | \n 64 | \n 69 | \n 71 | \n
\n CRUXEval-O | \n pass@1 | \n 41.5 | \n 41.25 | \n 39.63 | \n 50.25 | \n\n\n | BigCodeBench | \n pass@1 | \n 39.21 | \n 37.9 | \n 41.06 | \n 46.23 | \n
\n\n | \n Tool Calling Tasks\n | \n
\n\n | BFCL v3 | \n | \n 59.98 | \n 57.56 | \n 57.65 | \n 64.69 | \n
\n\n | \n Multilingual Tasks\n | \n
\n\n | MULTIPLE | \n pass@1 | \n 49.21 | \n 49.46 | \n 55.83 | \n 57.37 | \n
\n\n | MMMLU | \n 5-shot | \n 55.14 | \n 55.19 | \n 61.87 | \n 69.69 | \n
\n\n | INCLUDE | \n 5-shot | \n 51.62 | \n 50.51 | \n 53.12 | \n 63.97 | \n
\n\n | MGSM | \n 8-shot | \n 28.56 | \n 44.48 | \n 45.36 | \n 38.72 | \n
\n\n | \n Safety\n | \n
\n\n | SALAD-Bench | \n | \n 97.06 | \n 96.28 | \n 97.77 | \n 97.3 | \n
\n\n | AttaQ | \n | \n 86.05 | \n 84.44 | \n 86.61 | \n 86.64 | \n
\n
\n\n\n\n Multilingual Benchmarks and thr included languages:\n\n \n | Benchmarks | \n # Langs | \n Languages | \n
\n\n\n\n | MMMLU | \n 11 | \n ar, de, en, es, fr, ja, ko, pt, zh, bn, hi | \n
\n\n | INCLUDE | \n 14 | \n\n hi, bn, ta, te, ar, de, es, fr, it, ja, ko, nl, pt, zh | \n \n
\n\n | MGSM | \n 5 | \n en, es, fr, ja, zh | \n
\n\n
\n\n**Model Architecture:** \nGranite-4.0-H-Tiny baseline is built on a decoder-only MoE transformer architecture. Core components of this architecture are: GQA, Mamba2, MoEs with shared experts, SwiGLU activation, RMSNorm, and shared input/output embeddings.\n\n\n\n \n | Model | \n Micro Dense | \n H Micro Dense | \n H Tiny MoE | \n H Small MoE | \n
\n\n \n | Embedding size | \n 2560 | \n 2048 | \n 1536 | \n 4096 | \n
\n \n | Number of layers | \n 40 attention | \n 4 attention / 36 Mamba2 | \n 4 attention / 36 Mamba2 | \n 4 attention / 36 Mamba2 | \n
\n \n | Attention head size | \n 64 | \n 64 | \n 128 | \n 128 | \n
\n \n | Number of attention heads | \n 40 | \n 32 | \n 12 | \n 32 | \n
\n \n | Number of KV heads | \n 8 | \n 8 | \n 4 | \n 8 | \n
\n \n | Mamba2 state size | \n - | \n 128 | \n 128 | \n 128 | \n
\n \n | Number of Mamba2 heads | \n - | \n 64 | \n 48 | \n 128 | \n
\n\n \n | MLP / Shared expert hidden size | \n 8192 | \n 8192 | \n 1024 | \n 1536 | \n
\n \n\n \n | Num. Experts | \n - | \n - | \n 64 | \n 72 | \n
\n \n | Num. active Experts | \n - | \n - | \n 6 | \n 10 | \n
\n \n | Expert hidden size | \n - | \n - | \n 512 | \n 768 | \n
\n\n \n | MLP activation | \n SwiGLU | \n SwiGLU | \n SwiGLU | \n SwiGLU | \n
\n\n \n | Sequence length | \n 128K | \n 128K | \n 128K | \n 128K | \n
\n \n | Position embedding | \n RoPE | \n NoPE | \n NoPE | \n NoPE | \n
\n \n | # Parameters | \n 3B | \n 3B | \n 7B | \n 32B | \n
\n \n | # Active parameters | \n 3B | \n 3B | \n 1B | \n 9B | \n
\n
\n\n**Training Data:** \nOverall, our SFT data is largely comprised of three key sources: (1) publicly available datasets with permissive license, (2) internal synthetic data targeting specific capabilities, and (3) a select set of human-curated data.\n\n**Infrastructure:**\nWe trained the Granite 4.0 Language Models utilizing an NVIDIA GB200 NVL72 cluster hosted in CoreWeave. Intra-rack communication occurs via the 72-GPU NVLink domain, and a non-blocking, full Fat-Tree NDR 400 Gb/s InfiniBand network provides inter-rack communication. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs.\n\n**Ethical Considerations and Limitations:** \nGranite 4.0 Instruction Models are primarily finetuned using instruction-response pairs mostly in English, but also multilingual data covering multiple languages. Although this model can handle multilingual dialog use cases, its performance might not be similar to English tasks. In such case, introducing a small number of examples (few-shot) can help the model in generating more accurate outputs. While this model has been aligned by keeping safety in consideration, the model may in some cases produce inaccurate, biased, or unsafe responses to user prompts. So we urge the community to use this model with proper safety testing and tuning tailored for their specific tasks.\n\n**Resources**\n- ⭐\uFE0F Learn about the latest updates with Granite: https://www.ibm.com/granite\n- \uD83D\uDCC4 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/\n- \uD83D\uDCA1 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources\n\n",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ibm-granite/granite-4.0-h-tiny-GGUF/resolve/3971ea11968c34d4e4dbee55cfb55b9cba134b21/granite-4.0-h-tiny-Q4_K_M.gguf",
"memory": 4224733676,
"properties": {
"jinja": "true"
},
"sha256": "491ba81786c46a345a5da9a60cdb9f9a3056960c8411dd857153c194b1f91313",
"backend": "llama-cpp"
},
{
"id": "hf.ibm-granite.granite-4.0-micro-GGUF",
"name": "ibm-granite/granite-4.0-micro-GGUF",
"description": "# Granite-4.0-Micro\n\n**Model Summary:**\nGranite-4.0-Micro is a compact language model from the Granite 4.0 family designed for efficient deployment with strong performance. This Q4_K_M quantized GGUF version provides a good balance between model size and quality, making it suitable for resource-constrained environments while maintaining the core capabilities of the Granite 4.0 series.\n\n- **Developers:** Granite Team, IBM\n- **HF Collection:** [Granite 4.0 Language Models HF Collection](https://huggingface.co/collections/ibm-granite/granite-40-language-models-6811a18b820ef362d9e5a82c)\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Capabilities:**\n* General instruction following\n* Question-answering\n* Text generation\n* Conversational AI\n* Multilingual dialog use cases\n\n**Intended Use:**\nThe model is designed to respond to general instructions and can be used to build AI assistants for multiple domains, particularly in scenarios where model size and inference speed are important considerations.",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ibm-granite/granite-4.0-micro-GGUF/resolve/397e2dcbd97dcdfa016934bffed65cf5df3ca55f/granite-4.0-micro-Q4_K_M.gguf",
"memory": 2100000000,
"properties": {
"jinja": "true"
},
"sha256": "6c02683809a8dc4eb05c78d44bc63bcd707703b078998fa58829c858ab337bb0",
"backend": "llama-cpp"
},
{
"id": "hf.ibm-granite.granite-3.3-8b-instruct-GGUF",
"name": "ibm-granite/granite-3.3-8b-instruct-GGUF",
"description": "# Granite-3.3-8B-Instruct\n\n**Model Summary:**\nGranite-3.3-8B-Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. Built on top of Granite-3.3-8B-Base, the model delivers significant gains on benchmarks for measuring generic performance including AlpacaEval-2.0 and Arena-Hard, and improvements in mathematics, coding, and instruction following. It supprts structured reasoning through \\\\<\\/think\\> and \\\\<\\/response\\> tags, providing clear separation between internal thoughts and final outputs. The model has been trained on a carefully balanced combination of permissively licensed data and curated synthetic tasks.\n\n- **Developers:** Granite Team, IBM\n- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)\n- **Release Date**: April 16th, 2025\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Supported Languages:** \nEnglish, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. However, users may finetune this Granite model for languages beyond these 12 languages.\n\n**Intended Use:** \nThis model is designed to handle general instruction-following tasks and can be integrated into AI assistants across various domains, including business applications.\n\n**Capabilities**\n* Thinking\n* Summarization\n* Text classification\n* Text extraction\n* Question-answering\n* Retrieval Augmented Generation (RAG)\n* Code related tasks\n* Function-calling tasks\n* Multilingual dialog use cases\n* Fill-in-the-middle\n* Long-context tasks including long document/meeting summarization, long document QA, etc.\n\n\n**Generation:** \nThis is a simple example of how to use Granite-3.3-8B-Instruct model.\n\nInstall the following libraries:\n\n```shell\npip install torch torchvision torchaudio\npip install accelerate\npip install transformers\n```\nThen, copy the snippet from the section that is relevant for your use case.\n\n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, set_seed\nimport torch\n\nmodel_path=\"ibm-granite/granite-3.3-8b-instruct\"\ndevice=\"cuda\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_path,\n device_map=device,\n torch_dtype=torch.bfloat16,\n )\ntokenizer = AutoTokenizer.from_pretrained(\n model_path\n)\n\nconv = [{\"role\": \"user\", \"content\":\"Redesign a common household item to make it more sustainable and user-friendly. Explain the changes and their benefits.\"}]\n\ninput_ids = tokenizer.apply_chat_template(conv, return_tensors=\"pt\", thinking=True, return_dict=True, add_generation_prompt=True).to(device)\n\nset_seed(42)\noutput = model.generate(\n **input_ids,\n max_new_tokens=8192,\n)\n\nprediction = tokenizer.decode(output[0, input_ids[\"input_ids\"].shape[1]:], skip_special_tokens=True)\nprint(prediction)\n```\n\n**Example Outputs**\n- thinking=True\n```md\nTo address this prompt, I'll focus on redesigning a common household item: the plastic water bottle. The goal is to make it more sustainable and user-friendly. Here are the proposed changes and their benefits:\n\n1. **Material**: Replace plastic with biodegradable or recyclable materials like glass or plant-based plastics (PLA).\n2. **Design**: Introduce a modular design for easy refilling and cleaning.\n3. **Functionality**: Incorporate a built-in water filter for purification.\n4. **Durability**: Enhance the bottle's lifespan through robust construction.\n5. **User-friendliness**: Simplify the cap mechanism for easy opening and closing.\n\nThese changes would significantly reduce plastic waste, promote recycling, ensure access to clean drinking water, and enhance the overall user experience.\n\nNow, I will detail each change and its benefits in a structured format.\n\n### Redesign of the Plastic Water Bottle for Sustainability and User-Friendliness\n\n#### 1. Material: Biodegradable or Recyclable Alternatives\n\n**Change**: Transition from traditional PET plastic to glass or plant-based plastics (Polylactic Acid - PLA).\n\n**Benefits**:\n- **Sustainability**: Glass and PLA are biodegradable or highly recyclable, drastically reducing plastic pollution.\n- **Health**: Unlike PET, these materials do not leach chemicals into the water, ensuring safer consumption.\n\n#### 2. Modular Design for Easy Refilling and Cleaning\n\n**Change**: Implement a detachable, interchangeable inner liner within a reusable outer casing.\n\n**Benefits**:\n- **User-Friendliness**: Users can easily remove the inner liner for thorough cleaning, reducing the risk of bacterial growth.\n- **Sustainability**: The outer casing remains intact, encouraging reuse, while the inner liner can be replaced or recycled when worn out.\n\n#### 3. Built-in Water Filter\n\n**Change**: Integrate a removable, replaceable water filter into the bottle cap.\n\n**Benefits**:\n- **Accessibility**: Provides clean drinking water directly from any source (tap, river, etc.), eliminating the need for additional filtration devices.\n- **Health**: Removes contaminants, bacteria, and impurities, ensuring safe hydration.\n- **Sustainability**: Reduces the use of single-use plastic water bottles by offering a purification solution within a reusable container.\n\n#### 4. Enhanced Durability\n\n**Change**: Use high-quality, durable materials and robust construction techniques for both the outer casing and inner liner.\n\n**Benefits**:\n- **Longevity**: A more robust design increases the bottle's lifespan, reducing the frequency of replacements.\n- **Cost-effectiveness**: Although the initial cost might be higher, the long-term savings from reduced replacements make it economical.\n- **Sustainability**: Less frequent disposal means lower environmental impact.\n\n#### 5. Simplified Cap Mechanism\n\n**Change**: Adopt a one-touch, easy-to-open and close cap design, possibly inspired by sports bottle caps.\n\n**Benefits**:\n- **User-Friendliness**: Especially beneficial for children, elderly, or those with limited hand dexterity.\n- **Convenience**: Quick access to water encourages more frequent hydration, promoting healthier habits.\n\n### Conclusion\n\nThis redesigned water bottle addresses key sustainability concerns by minimizing plastic use and waste, while also enhancing user-friendliness through improved functionality, durability, and accessibility. By integrating biodegradable materials, a modular design, a built-in filter, robust construction, and a simple cap mechanism, the bottle not only reduces environmental footprint but also offers a superior user experience.\n```\n- thinking=False\n```md\nItem for Redesign: Plastic Bottle (e.g., water or beverage bottle)\n\nRedesign Concept: Eco-Friendly, Modular, and Multi-Functional Reusable Bottle\n\nChanges:\n\n1. Material: Replace the conventional plastic with food-grade, BPA-free, and recyclable materials such as stainless steel or glass. These materials are durable, non-toxic, and have a longer lifespan, reducing waste.\n\n2. Modular Design: Introduce a modular design that allows users to customize their bottle according to their needs. This could include interchangeable caps, mouthpiece sizes, and carrying loops. Users can purchase additional modules as needed, promoting a more sustainable approach by reducing the need to buy an entirely new bottle for different purposes.\n\n3. Integrated Filter: Incorporate a built-in, washable, and reusable filter that can remove impurities and improve the taste of water. This eliminates the need for single-use disposable filters or bottled water, further reducing plastic waste.\n\n4. Smart Cap: Develop a smart cap with a built-in digital display and temperature sensor. This feature allows users to track their daily water intake, set hydration goals, and monitor the temperature of their beverage. The smart cap can be synced with a mobile app for additional functionality, such as reminders and progress tracking.\n\n5. Easy-to-Clean Design: Ensure the bottle has a wide mouth and smooth interior surfaces for easy cleaning. Include a brush for hard-to-reach areas, making maintenance simple and encouraging regular use.\n\n6. Collapsible Structure: Implement a collapsible design that reduces the bottle's volume when not in use, making it more portable and convenient for storage.\n\nBenefits:\n\n1. Sustainability: By using recyclable materials and reducing plastic waste, this redesigned bottle significantly contributes to a more sustainable lifestyle. The modular design and reusable filter also minimize single-use plastic consumption.\n\n2. User-Friendly: The smart cap, easy-to-clean design, and collapsible structure make the bottle convenient and user-friendly. Users can customize their bottle to suit their needs, ensuring a better overall experience.\n\n3. Healthier Option: Using food-grade, BPA-free materials and an integrated filter ensures that the beverages consumed are free from harmful chemicals and impurities, promoting a healthier lifestyle.\n\n4. Cost-Effective: Although the initial investment might be higher, the long-term savings from reduced purchases of single-use plastic bottles and disposable filters make this reusable bottle a cost-effective choice.\n\n5. Encourages Hydration: The smart cap's features, such as hydration tracking and temperature monitoring, can motivate users to stay hydrated and develop healthier habits.\n\nBy redesigning a common household item like the plastic bottle, we can create a more sustainable, user-friendly, and health-conscious alternative that benefits both individuals and the environment.\n```\n\n**Evaluation Results:**\n\n\n Comparison with different models over various benchmarks1. Scores of AlpacaEval-2.0 and Arena-Hard are calculated with thinking=True\n \n | Models | \n Arena-Hard | \n AlpacaEval-2.0 | \n MMLU | \n PopQA | \n TruthfulQA | \n BigBenchHard2 | \n DROP3 | \n GSM8K | \n HumanEval | \n HumanEval+ | \n IFEval | \n AttaQ | \n
\n \n\n | Granite-3.1-2B-Instruct | \n 23.3 | \n 27.17 | \n 57.11 | \n 20.55 | \n 59.79 | \n 61.82 | \n 20.99 | \n 67.55 | \n 79.45 | \n 75.26 | \n 63.59 | \n 84.7 | \n
\n \n | Granite-3.2-2B-Instruct | \n 24.86 | \n 34.51 | \n 57.18 | \n 20.56 | \n 59.8 | \n 61.39 | \n 23.84 | \n 67.02 | \n 80.13 | \n 73.39 | \n 61.55 | \n 83.23 | \n
\n \n | Granite-3.3-2B-Instruct | \n 28.86 | \n 43.45 | \n 55.88 | \n 18.4 | \n 58.97 | \n 63.91 | \n 44.33 | \n 72.48 | \n 80.51 | \n 75.68 | \n 65.8 | \n 87.47 | \n
\n \n \n | Llama-3.1-8B-Instruct | \n 36.43 | \n 27.22 | \n 69.15 | \n 28.79 | \n 52.79 | \n 73.43 | \n 71.23 | \n 83.24 | \n 85.32 | \n 80.15 | \n 79.10 | \n 83.43 | \n
\n \n \n | DeepSeek-R1-Distill-Llama-8B | \n 17.17 | \n 21.85 | \n 45.80 | \n 13.25 | \n 47.43 | \n 67.39 | \n 49.73 | \n 72.18 | \n 67.54 | \n 62.91 | \n 66.50 | \n 42.87 | \n
\n \n \n | Qwen-2.5-7B-Instruct | \n 25.44 | \n 30.34 | \n 74.30 | \n 18.12 | \n 63.06 | \n 69.19 | \n 64.06 | \n 84.46 | \n 93.35 | \n 89.91 | \n 74.90 | \n 81.90 | \n
\n \n \n | DeepSeek-R1-Distill-Qwen-7B | \n 10.36 | \n 15.35 | \n 50.72 | \n 9.94 | \n 47.14 | \n 67.38 | \n 51.78 | \n 78.47 | \n 79.89 | \n 78.43 | \n 59.10 | \n 42.45 | \n
\n \n | Granite-3.1-8B-Instruct | \n 37.58 | \n 30.34 | \n 66.77 | \n 28.7 | \n 65.84 | \n 69.87 | \n 58.57 | \n 79.15 | \n 89.63 | \n 85.79 | \n 73.20 | \n 85.73 | \n
\n \n\n | Granite-3.2-8B-Instruct | \n 55.25 | \n 61.19 | \n 66.79 | \n 28.04 | \n 66.92 | \n 71.86 | \n 58.29 | \n 81.65 | \n 89.35 | \n 85.72 | \n 74.31 | \n 84.7 | \n
\n \n | Granite-3.3-8B-Instruct | \n 57.56 | \n 62.68 | \n 65.54 | \n 26.17 | \n 66.86 | \n 69.13 | \n 59.36 | \n 80.89 | \n 89.73 | \n 86.09 | \n 74.82 | \n 88.5 | \n
\n
\n\n\n Math Benchmarks\n\n \n | Models | \n AIME24 | \n MATH-500 | \n
\n \n \n | Granite-3.1-2B-Instruct | \n 0.89 | \n 35.07 | \n
\n \n | Granite-3.2-2B-Instruct | \n 0.89 | \n 35.54 | \n
\n \n | Granite-3.3-2B-Instruct | \n 3.28 | \n 58.09 | \n
\n \n | Granite-3.1-8B-Instruct | \n 1.97 | \n 48.73 | \n
\n \n | Granite-3.2-8B-Instruct | \n 2.43 | \n 52.8 | \n
\n \n | Granite-3.3-8B-Instruct | \n 8.12 | \n 69.02 | \n
\n
\n \n**Training Data:** \nOverall, our training data is largely comprised of two key sources: (1) publicly available datasets with permissive license, (2) internal synthetically generated data targeted to enhance reasoning capabilites. \n\n\n**Infrastructure:**\nWe train Granite-3.3-8B-Instruct using IBM's super computing cluster, Blue Vela, which is outfitted with NVIDIA H100 GPUs. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs.\n\n**Ethical Considerations and Limitations:** \nGranite-3.3-8B-Instruct builds upon Granite-3.3-8B-Base, leveraging both permissively licensed open-source and select proprietary data for enhanced performance. Since it inherits its foundation from the previous model, all ethical considerations and limitations applicable to [Granite-3.3-8B-Base](https://huggingface.co/ibm-granite/granite-3.3-8b-base) remain relevant.\n\n\n**Resources**\n- ⭐\uFE0F Learn about the latest updates with Granite: https://www.ibm.com/granite\n- \uD83D\uDCC4 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/\n- \uD83D\uDCA1 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources\n\n[1] Evaluated using OLMES (except AttaQ and Arena-Hard scores)
\n[2] Added regex for more efficient asnwer extraction.
\n[3] Modified the implementation to handle some of the issues mentioned here
\n",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ibm-granite/granite-3.3-8b-instruct-GGUF/resolve/main/granite-3.3-8b-instruct-Q4_K_M.gguf",
"memory": 4939212390,
"properties": {
"jinja": "true"
},
"sha256": "77bcee066a76dcdd10d0d123c87e32c8ec2c74e31b6ffd87ebee49c9ac215dca",
"backend": "llama-cpp"
},
{
"id": "hf.ibm-research.granite-3.2-8b-instruct-GGUF",
"name": "ibm-research/granite-3.2-8b-instruct-GGUF",
"description": "# Granite-3.2-8B-Instruct-GGUF\n\n**Model Summary:**\nGranite-3.2-8B-Instruct is an 8-billion-parameter, long-context AI model fine-tuned for thinking capabilities. Built on top of [Granite-3.1-8B-Instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct), it has been trained using a mix of permissively licensed open-source datasets and internally generated synthetic data designed for reasoning tasks. The model allows controllability of its thinking capability, ensuring it is applied only when required.\n\n- **Developers:** Granite Team, IBM\n- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)\n- **Release Date**: February 26th, 2025\n- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n\n**Supported Languages:** \nEnglish, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. However, users may finetune this Granite model for languages beyond these 12 languages.\n\n**Intended Use:** \nThis model is designed to handle general instruction-following tasks and can be integrated into AI assistants across various domains, including business applications.\n\n**Capabilities**\n* **Thinking**\n* Summarization\n* Text classification\n* Text extraction\n* Question-answering\n* Retrieval Augmented Generation (RAG)\n* Code related tasks\n* Function-calling tasks\n* Multilingual dialog use cases\n* Long-context tasks including long document/meeting summarization, long document QA, etc.",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ibm-research/granite-3.2-8b-instruct-GGUF/resolve/main/granite-3.2-8b-instruct-Q4_K_M.gguf",
"memory": 4939212390,
"properties": {
"chatFormat": "openchat"
},
"sha256": "363f0bbc3200b9c9b0ab87efe237d77b1e05bb929d5d7e4b57c1447c911223e8",
"backend": "llama-cpp"
},
{
"id": "hf.ibm-granite.granite-8b-code-instruct",
"name": "ibm-granite/granite-8b-code-instruct-GGUF",
"description": "\n\n# ibm-granite/granite-8b-code-instruct-GGUF\nThis is the Q4_K_M converted version of the original [`ibm-granite/granite-8b-code-instruct`](https://huggingface.co/ibm-granite/granite-8b-code-instruct).\nRefer to the [original model card](https://huggingface.co/ibm-granite/granite-8b-code-instruct) for more details.\n\n## Use with llama.cpp\n```shell\ngit clone https://github.com/ggerganov/llama.cpp\ncd llama.cpp\n\n# install\nmake\n\n# run generation\n./main -m granite-8b-code-instruct-GGUF/granite-8b-code-instruct.Q4_K_M.gguf -n 128 -p \"def generate_random(x: int):\" --color\n```",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ibm-granite/granite-8b-code-instruct-GGUF/resolve/main/granite-8b-code-instruct.Q4_K_M.gguf",
"memory": 5347234284,
"properties": {
"chatFormat": "openchat"
},
"sha256": "bc8804cb43c4e1e82e2188658569b147587f83a89640600a64d5f7d7de2565b4",
"backend": "llama-cpp"
},
{
"id": "hf.ggerganov.whisper.cpp",
"name": "ggerganov/whisper.cpp",
"description": "# OpenAI's Whisper models converted to ggml format\n\n[Available models](https://huggingface.co/ggerganov/whisper.cpp/tree/main)\n",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
"memory": 487010000,
"sha256": "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b",
"backend": "whisper-cpp"
},
{
"id": "hf.facebook.detr-resnet-101",
"name": "facebook/detr-resnet-101",
"description": "# DETR (End-to-End Object Detection) model with ResNet-101 backbone\n\nDEtection TRansformer (DETR) model trained end-to-end on COCO 2017 object detection (118k annotated images). It was introduced in the paper [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) by Carion et al. and first released in [this repository](https://github.com/facebookresearch/detr). \n\nDisclaimer: The team releasing DETR did not write a model card for this model so this model card has been written by the Hugging Face team.\n\n## Model description\n\nThe DETR model is an encoder-decoder transformer with a convolutional backbone. Two heads are added on top of the decoder outputs in order to perform object detection: a linear layer for the class labels and a MLP (multi-layer perceptron) for the bounding boxes. The model uses so-called object queries to detect objects in an image. Each object query looks for a particular object in the image. For COCO, the number of object queries is set to 100. \n\nThe model is trained using a \"bipartite matching loss\": one compares the predicted classes + bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N (so if an image only contains 4 objects, 96 annotations will just have a \"no object\" as class and \"no bounding box\" as bounding box). The Hungarian matching algorithm is used to create an optimal one-to-one mapping between each of the N queries and each of the N annotations. Next, standard cross-entropy (for the classes) and a linear combination of the L1 and generalized IoU loss (for the bounding boxes) are used to optimize the parameters of the model.\n\n\n\n## Intended uses & limitations\n\nYou can use the raw model for object detection. See the [model hub](https://huggingface.co/models?search=facebook/detr) to look for all available DETR models.",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/facebook/detr-resnet-101/resolve/no_timm/pytorch_model.bin",
"memory": 242980000,
"properties": {
"name": "facebook/detr-resnet-101"
},
"sha256": "893ae2442b36b2e8e1134ccbf8c0d9bd670648d0964509202ab30c9cbb3d2114",
"backend": "none"
},
{
"id": "hf.microsoft.Phi-4-mini-reasoning",
"name": "microsoft/Phi-4-mini-reasoning (Unsloth quantization)",
"description": "## Model Summary\n \nPhi-4-mini-reasoning is a lightweight open model built upon synthetic data with a focus on high-quality, reasoning dense data further finetuned for more advanced math reasoning capabilities. \nThe model belongs to the Phi-4 model family and supports 128K token context length. \n \n\uD83D\uDCF0 [Phi-4-mini-reasoning Blog](https://aka.ms/phi4-mini-reasoning/blog), and [Developer Article](https://techcommunity.microsoft.com/blog/azuredevcommunityblog/make-phi-4-mini-reasoning-more-powerful-with-industry-reasoning-on-edge-devices/4409764)
\n\uD83D\uDCD6 [Phi-4-mini-reasoning Technical Report](https://aka.ms/phi4-mini-reasoning/techreport) | [HF paper](https://huggingface.co/papers/2504.21233)
\n\uD83D\uDC69\u200D\uD83C\uDF73 [Phi Cookbook](https://github.com/microsoft/PhiCookBook)
\n\uD83C\uDFE1 [Phi Portal](https://azure.microsoft.com/en-us/products/phi)
\n\uD83D\uDDA5\uFE0F Try It [Azure](https://aka.ms/phi4-mini-reasoning/azure)
\n \n \n\uD83C\uDF89**Phi-4 models**: [[Phi-4-reasoning](https://huggingface.co/microsoft/Phi-4-reasoning)] | [[multimodal-instruct](https://huggingface.co/microsoft/Phi-4-multimodal-instruct) | [onnx](https://huggingface.co/microsoft/Phi-4-multimodal-instruct-onnx)]; \n[[mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [onnx](https://huggingface.co/microsoft/Phi-4-mini-instruct-onnx)]\n\n## Intended Uses\n \n### Primary Use Cases\n\nPhi-4-mini-reasoning is designed for multi-step, logic-intensive mathematical problem-solving tasks under memory/compute constrained environments and latency bound scenarios.\nSome of the use cases include formal proof generation, symbolic computation, advanced word problems, and a wide range of mathematical reasoning scenarios. \nThese models excel at maintaining context across steps, applying structured logic, and delivering accurate, reliable solutions in domains that require deep analytical thinking.\n\n### Use Case Considerations\n \nThis model is designed and tested for math reasoning only. It is not specifically designed or evaluated for all downstream purposes. \nDevelopers should consider common limitations of language models, as well as performance difference across languages, as they select use cases, and evaluate and mitigate for accuracy, safety, and fairness before using within a specific downstream use case, particularly for high-risk scenarios. \nDevelopers should be aware of and adhere to applicable laws or regulations (including but not limited to privacy, trade compliance laws, etc.) that are relevant to their use case. \n \n***Nothing contained in this Model Card should be interpreted as or deemed a restriction or modification to the license the model is released under.***\n \n## Release Notes\n \nThis release of Phi-4-mini-reasoning addresses user feedback and market demand for a compact reasoning model. \nIt is a compact transformer-based language model optimized for mathematical reasoning, built to deliver high-quality, step-by-step problem solving in environments where computing or latency is constrained.\nThe model is fine-tuned with synthetic math data from a more capable model (much larger, smarter, more accurate, and better at following instructions), which has resulted in enhanced reasoning performance. \nPhi-4-mini-reasoning balances reasoning ability with efficiency, making it potentially suitable for educational applications, embedded tutoring, and lightweight deployment on edge or mobile systems.\nIf a critical issue is identified with Phi-4-mini-reasoning, it should be promptly reported through the MSRC Researcher Portal or secure@microsoft.com \n \n### Model Quality\n \nTo understand the capabilities, the 3.8B parameters Phi-4-mini-reasoning model was compared with a set of models over a variety of reasoning benchmarks. \nA high-level overview of the model quality is as follows:\n\n| Model | AIME | MATH-500 | GPQA Diamond |\n|------------------------------------|-------|----------|--------------|\n| o1-mini* | 63.6 | 90.0 | 60.0 |\n| DeepSeek-R1-Distill-Qwen-7B | 53.3 | 91.4 | 49.5 |\n| DeepSeek-R1-Distill-Llama-8B | 43.3 | 86.9 | 47.3 |\n| Bespoke-Stratos-7B* | 20.0 | 82.0 | 37.8 |\n| OpenThinker-7B* | 31.3 | 83.0 | 42.4 |\n| Llama-3.2-3B-Instruct | 6.7 | 44.4 | 25.3 |\n| Phi-4-Mini (base model, 3.8B) | 10.0 | 71.8 | 36.9 |\n|**Phi-4-mini-reasoning (3.8B)** | **57.5** | **94.6** | **52.0** |\n \nOverall, the model with only 3.8B-param achieves a similar level of multilingual language understanding and reasoning ability as much larger models.\nHowever, it is still fundamentally limited by its size for certain tasks. The model simply does not have the capacity to store too much factual knowledge, therefore, users may experience factual incorrectness. However, it may be possible to resolve such weakness by augmenting Phi-4 with a search engine, particularly when using the model under RAG settings.\n \n## Usage\n \n### Tokenizer\n \nPhi-4-mini-reasoning supports a vocabulary size of up to `200064` tokens. The [tokenizer files](https://huggingface.co/microsoft/Phi-4-mini-reasoning/blob/main/added_tokens.json) already provide placeholder tokens that can be used for downstream fine-tuning, but they can also be extended up to the model's vocabulary size.\n \n### Input Formats\n \nGiven the nature of the training data, the Phi-4-mini-instruct\nmodel is best suited for prompts using specific formats.\nBelow are the two primary formats:\n \n#### Chat format\n \nThis format is used for general conversation and instructions:\n \n```yaml\n<|system|>Your name is Phi, an AI math expert developed by Microsoft.<|end|><|user|>How to solve 3*x^2+4*x+5=1?<|end|><|assistant|>\n```\n### Inference with transformers\n\nPhi-4-mini-reasoning has been integrated in the `4.51.3` version of `transformers`. The current `transformers` version can be verified with: `pip list | grep transformers`.\nPython 3.8 and 3.10 will work best. \nList of required packages:\n\n```\nflash_attn==2.7.4.post1\ntorch==2.5.1\ntransformers==4.51.3\naccelerate==1.3.0\n```\n \nPhi-4-mini-reasoning is also available in [Azure AI Studio](https://aka.ms/phi-4-mini-reasoning/azure)\n\n#### Example\n \nAfter obtaining the Phi-4-mini-instruct model checkpoints, users can use this sample code for inference.\n \n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\ntorch.random.manual_seed(0)\n\nmodel_id = \"microsoft/Phi-4-mini-reasoning\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"cuda\",\n torch_dtype=\"auto\",\n trust_remote_code=True,\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\nmessages = [{\n \"role\": \"user\",\n \"content\": \"How to solve 3*x^2+4*x+5=1?\"\n}] \ninputs = tokenizer.apply_chat_template(\n messages,\n add_generation_prompt=True,\n return_dict=True,\n return_tensors=\"pt\",\n)\n\noutputs = model.generate(\n **inputs.to(model.device),\n max_new_tokens=32768,\n temperature=0.8,\n top_p=0.95,\n do_sample=True,\n)\noutputs = tokenizer.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])\n\nprint(outputs[0])\n```\n \n## Training\n \n### Model\n \n+ **Architecture:** Phi-4-mini-reasoning shares the same architecture as Phi-4-Mini, which has 3.8B parameters and is a dense decoder-only Transformer model. When compared with Phi-3.5-Mini, the major changes with Phi-4-Mini are 200K vocabulary, grouped-query attention, and shared input and output embedding.
\n+ **Inputs:** Text. It is best suited for prompts using the chat format.
\n+ **Context length:** 128K tokens
\n+ **GPUs:** 128 H100-80G
\n+ **Training time:** 2 days
\n+ **Training data:** 150B tokens
\n+ **Outputs:** Generated text
\n+ **Dates:** Trained in February 2024
\n+ **Status:** This is a static model trained on offline datasets with the cutoff date of February 2025 for publicly available data.
\n+ **Supported languages:** English
\n+ **Release date:** April 2025
\n \n### Training Datasets\n \nThe training data for Phi-4-mini-reasoning consists exclusively of synthetic mathematical content generated by a stronger and more advanced reasoning model, Deepseek-R1. \nThe objective is to distill knowledge from this model. This synthetic dataset comprises over one million diverse math problems spanning multiple levels of difficulty (from middle school to Ph.D. level).\nFor each problem in the synthetic dataset, eight distinct solutions (rollouts) were sampled, and only those verified as correct were retained, resulting in approximately 30 billion tokens of math content.\nThe dataset integrates three primary components: \n1) a curated selection of high-quality, publicly available math questions and a part of the SFT(Supervised Fine-Tuning) data that was used to train the base Phi-4-Mini model;\n2) an extensive collection of synthetic math data generated by the Deepseek-R1 model, designed specifically for high-quality supervised fine-tuning and model distillation; and\n3) a balanced set of correct and incorrect answers used to construct preference data aimed at enhancing Phi-4-mini-reasoning's reasoning capabilities by learning more effective reasoning trajectories\n\n## Software\n* [PyTorch](https://github.com/pytorch/pytorch)\n* [Transformers](https://github.com/huggingface/transformers)\n* [Flash-Attention](https://github.com/HazyResearch/flash-attention)\n \n## Hardware\nNote that by default, the Phi-4-mini-reasoning model uses flash attention, which requires certain types of GPU hardware to run. We have tested on the following GPU types:\n* NVIDIA A100\n* NVIDIA H100\n \nIf you want to run the model on:\n* NVIDIA V100 or earlier generation GPUs: call AutoModelForCausalLM.from_pretrained() with attn_implementation=\"eager\"\n\n## Safety Evaluation and Red-Teaming\n \nThe Phi-4 family of models has adopted a robust safety post-training approach. This approach leverages a variety of both open-source and in-house generated datasets. The overall technique employed to do the safety alignment is a combination of SFT, DPO (Direct Preference Optimization), and RLHF (Reinforcement Learning from Human Feedback) approaches by utilizing human-labeled and synthetic English-language datasets, including publicly available datasets focusing on helpfulness and harmlessness, as well as various questions and answers targeted to multiple safety categories. \n\nPhi-4-Mini-Reasoning was developed in accordance with Microsoft's responsible AI principles. Potential safety risks in the model’s responses were assessed using the Azure AI Foundry’s Risk and Safety Evaluation framework, focusing on harmful content, direct jailbreak, and model groundedness. The Phi-4-Mini-Reasoning Model Card contains additional information about our approach to safety and responsible AI considerations that developers should be aware of when using this model.\n\n## Responsible AI Considerations\n \nLike other language models, the Phi family of models can potentially behave in ways that are unfair, unreliable, or offensive. Some of the limiting behaviors to be aware of include:\n \n+ Quality of Service: The Phi models are trained primarily on English text and some additional multilingual text. Languages other than English will experience worse performance as well as performance disparities across non-English. English language varieties with less representation in the training data might experience worse performance than standard American English. \n+ Multilingual performance and safety gaps: We believe it is important to make language models more widely available across different languages, but the Phi 4 models still exhibit challenges common across multilingual releases. As with any deployment of LLMs, developers will be better positioned to test for performance or safety gaps for their linguistic and cultural context and customize the model with additional fine-tuning and appropriate safeguards.\n+ Representation of Harms & Perpetuation of Stereotypes: These models can over- or under-represent groups of people, erase representation of some groups, or reinforce demeaning or negative stereotypes. Despite safety post-training, these limitations may still be present due to differing levels of representation of different groups, cultural contexts, or prevalence of examples of negative stereotypes in training data that reflect real-world patterns and societal biases.\n+ Inappropriate or Offensive Content: These models may produce other types of inappropriate or offensive content, which may make it inappropriate to deploy for sensitive contexts without additional mitigations that are specific to the case.\n+ Information Reliability: Language models can generate nonsensical content or fabricate content that might sound reasonable but is inaccurate or outdated. \n+\tElection Information Reliability : The model has an elevated defect rate when responding to election-critical queries, which may result in incorrect or unauthoritative election critical information being presented. We are working to improve the model's performance in this area. Users should verify information related to elections with the election authority in their region.\n+ Limited Scope for Code: The majority of Phi 4 training data is based in Python and uses common packages such as \"typing, math, random, collections, datetime, itertools\". If the model generates Python scripts that utilize other packages or scripts in other languages, it is strongly recommended that users manually verify all API uses.\n+ Long Conversation: Phi 4 models, like other models, can in some cases generate responses that are repetitive, unhelpful, or inconsistent in very long chat sessions in both English and non-English languages. Developers are encouraged to place appropriate mitigations, like limiting conversation turns to account for the possible conversational drift.\n \nDevelopers should apply responsible AI best practices, including mapping, measuring, and mitigating risks associated with their specific use case and cultural, linguistic context. Phi 4 family of models are general purpose models. As developers plan to deploy these models for specific use cases, they are encouraged to fine-tune the models for their use case and leverage the models as part of broader AI systems with language-specific safeguards in place. Important areas for consideration include: \n \n+ Allocation: Models may not be suitable for scenarios that could have consequential impact on legal status or the allocation of resources or life opportunities (ex: housing, employment, credit, etc.) without further assessments and additional debiasing techniques.\n+ High-Risk Scenarios: Developers should assess the suitability of using models in high-risk scenarios where unfair, unreliable or offensive outputs might be extremely costly or lead to harm. This includes providing advice in sensitive or expert domains where accuracy and reliability are critical (ex: legal or health advice). Additional safeguards should be implemented at the application level according to the deployment context.\n+ Misinformation: Models may produce inaccurate information. Developers should follow transparency best practices and inform end-users they are interacting with an AI system. At the application level, developers can build feedback mechanisms and pipelines to ground responses in use-case specific, contextual information, a technique known as Retrieval Augmented Generation (RAG). \n+ Generation of Harmful Content: Developers should assess outputs for their context and use available safety classifiers or custom solutions appropriate for their use case.\n+ Misuse: Other forms of misuse such as fraud, spam, or malware production may be possible, and developers should ensure that their applications do not violate applicable laws and regulations.\n \n## License\nThe model is licensed under the [MIT license](./LICENSE).\n \n## Trademarks\nThis project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies.\n \n \n## Appendix A: Benchmark Methodology\n \nWe include a brief word on methodology here - and in particular, how we think about optimizing prompts. In an ideal world, we would never change any prompts in our benchmarks to ensure it is always an apples-to-apples comparison when comparing different models. Indeed, this is our default approach, and is the case in the vast majority of models we have run to date. For all benchmarks, we consider using the same generation configuration such as max sequence length (32768), the same temperature for the fair comparison.\nBenchmark datasets\nWe evaluate the model with three of the most popular math benchmarks where the strongest reasoning models are competing together. Specifically:\n-\tMath-500: This benchmark consists of 500 challenging math problems designed to test the model's ability to perform complex mathematical reasoning and problem-solving.\n-\tAIME 2024: The American Invitational Mathematics Examination (AIME) is a highly regarded math competition that features a series of difficult problems aimed at assessing advanced mathematical skills and logical reasoning.\n-\tGPQA Diamond: The Graduate-Level Google-Proof Q&A (GPQA) Diamond benchmark focuses on evaluating the model's ability to understand and solve a wide range of mathematical questions, including both straightforward calculations and more intricate problem-solving tasks.",
"registry": "Hugging Face",
"license": "MIT",
"url": "https://huggingface.co/unsloth/Phi-4-mini-reasoning-GGUF/resolve/main/Phi-4-mini-reasoning-Q4_K_M.gguf",
"properties": {
"jinja": "true"
},
"memory": 2480343613,
"sha256": "81878401a2f8160473649af89560a7fc0932f3623e4f6e58143d5dcbf71d6480",
"backend": "llama-cpp"
},
{
"id": "hf.microsoft.Phi-4-reasoning-plus",
"name": "microsoft/Phi-4-reasoning-plus (Unsloth quantization)",
"description": "## Model Summary\n\n| | | \n|-------------------------|-------------------------------------------------------------------------------|\n| **Developers** | Microsoft Research |\n| **Description** | Phi-4-reasoning-plus is a state-of-the-art open-weight reasoning model finetuned from Phi-4 using supervised fine-tuning on a dataset of chain-of-thought traces and reinforcement learning. The supervised fine-tuning dataset includes a blend of synthetic prompts and high-quality filtered data from public domain websites, focused on math, science, and coding skills as well as alignment data for safety and Responsible AI. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning. Phi-4-reasoning-plus has been trained additionally with Reinforcement Learning, hence, it has higher accuracy but generates on average 50% more tokens, thus having higher latency. |\n| **Architecture** | Base model same as previously released Phi-4, 14B parameters, dense decoder-only Transformer model |\n| **Inputs** | Text, best suited for prompts in the chat format |\n| **Context length** | 32k tokens |\n| **GPUs** | 32 H100-80G |\n| **Training time** | 2.5 days |\n| **Training data** | 16B tokens, ~8.3B unique tokens |\n| **Outputs** | Generated text in response to the input. Model responses have two sections, namely, a reasoning chain-of-thought block followed by a summarization block |\n| **Dates** | January 2025 – April 2025 |\n| **Status** | Static model trained on an offline dataset with cutoff dates of March 2025 and earlier for publicly available data |\n| **Release date** | April 30, 2025 |\n| **License** | MIT |\n\n## Intended Use\n\n| | |\n|-------------------------------|-------------------------------------------------------------------------|\n| **Primary Use Cases** | Our model is designed to accelerate research on language models, for use as a building block for generative AI powered features. It provides uses for general purpose AI systems and applications (primarily in English) which require:
1. Memory/compute constrained environments.
2. Latency bound scenarios.
3. Reasoning and logic. |\n| **Out-of-Scope Use Cases** | This model is designed and tested for math reasoning only. Our models are not specifically designed or evaluated for all downstream purposes. Developers should consider common limitations of language models as they select use cases, and evaluate and mitigate for accuracy, safety, and fairness before using within a specific downstream use case, particularly for high-risk scenarios. Developers should be aware of and adhere to applicable laws or regulations (including privacy, trade compliance laws, etc.) that are relevant to their use case, including the model’s focus on English. Review the Responsible AI Considerations section below for further guidance when choosing a use case. Nothing contained in this Model Card should be interpreted as or deemed a restriction or modification to the license the model is released under. |\n\n## Usage\n\n> \n > To fully take advantage of the model's capabilities, inference must use `temperature=0.8`, `top_k=50`, `top_p=0.95`, and `do_sample=True`. For more complex queries, set `max_new_tokens=32768` to allow for longer chain-of-thought (CoT).\n *Phi-4-reasoning-plus has shown strong performance on reasoning-intensive tasks. In our experiments, we extended its maximum number of tokens to 64k, and it handled longer sequences with promising results, maintaining coherence and logical consistency over extended inputs. This makes it a compelling option to explore for tasks that require deep, multi-step reasoning or extensive context.*\n### Input Formats\nGiven the nature of the training data, **always use** ChatML template with the **following system prompt** for inference:\n```bash\n<|im_start|>system<|im_sep|>\nYou are Phi, a language model trained by Microsoft to help users. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: {Thought section} {Solution section}. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion. Now, try to solve the following question through the above guidelines:<|im_end|>\n<|im_start|>user<|im_sep|>\nWhat is the derivative of x^2?<|im_end|>\n<|im_start|>assistant<|im_sep|>\n```\n### With `transformers`\n```python\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\ntokenizer = AutoTokenizer.from_pretrained(\\\"microsoft/Phi-4-reasoning-plus\\\")\nmodel = AutoModelForCausalLM.from_pretrained(\\\"microsoft/Phi-4-reasoning-plus\\\", device_map=\\\"auto\\\", torch_dtype=\\\"auto\\\")\nmessages = [\n {\\\"role\\\": \\\"system\\\", \\\"content\\\": \\\"You are Phi, a language model trained by Microsoft to help users. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: {Thought section} {Solution section}. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion. Now, try to solve the following question through the above guidelines:\\\"},\n {\\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"What is the derivative of x^2?\\\"},\n]\ninputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors=\\\"pt\\\")\n\noutputs = model.generate(\n inputs.to(model.device),\n max_new_tokens=4096,\n temperature=0.8,\n top_k=50,\n top_p=0.95,\n do_sample=True,\n)\nprint(tokenizer.decode(outputs[0]))\n```\n### With `vllm`\n\n```bash\nvllm serve microsoft/Phi-4-reasoning-plus --enable-reasoning --reasoning-parser deepseek_r1\n```\n\n*Phi-4-reasoning-plus is also supported out-of-the-box by Ollama, llama.cpp, and any Phi-4 compatible framework.*\n\n## Data Overview\n\n### Training Datasets\n\nOur training data is a mixture of Q&A, chat format data in math, science, and coding. The chat prompts are sourced from filtered high-quality web data and optionally rewritten and processed through a synthetic data generation pipeline. We further include data to improve truthfulness and safety.\n\n### Benchmark Datasets\n\nWe evaluated Phi-4-reasoning-plus using the open-source [Eureka](https://github.com/microsoft/eureka-ml-insights) evaluation suite and our own internal benchmarks to understand the model's capabilities. More specifically, we evaluate our model on:\n\nReasoning tasks:\n\n* **AIME 2025, 2024, 2023, and 2022:** Math olympiad questions.\n\n* **GPQA-Diamond:** Complex, graduate-level science questions.\n\n* **OmniMath:** Collection of over 4000 olympiad-level math problems with human annotation.\n\n* **LiveCodeBench:** Code generation benchmark gathered from competitive coding contests.\n\n* **3SAT (3-literal Satisfiability Problem) and TSP (Traveling Salesman Problem):** Algorithmic problem solving.\n\n* **BA Calendar:** Planning.\n\n* **Maze and SpatialMap:** Spatial understanding.\n\nGeneral-purpose benchmarks:\n\n* **Kitab:** Information retrieval.\n\n* **IFEval and ArenaHard:** Instruction following.\n\n* **PhiBench:** Internal benchmark.\n\n* **FlenQA:** Impact of prompt length on model performance.\n\n* **HumanEvalPlus:** Functional code generation.\n\n* **MMLU-Pro:** Popular aggregated dataset for multitask language understanding.\n\n## Safety\n\n### Approach\n\nPhi-4-reasoning-plus has adopted a robust safety post-training approach via supervised fine-tuning (SFT). This approach leverages a variety of both open-source and in-house generated synthetic prompts, with LLM-generated responses that adhere to rigorous Microsoft safety guidelines, e.g., User Understanding and Clarity, Security and Ethical Guidelines, Limitations, Disclaimers and Knowledge Scope, Handling Complex and Sensitive Topics, Safety and Respectful Engagement, Confidentiality of Guidelines and Confidentiality of Chain-of-Thoughts. \n\n### Safety Evaluation and Red-Teaming\n\nPrior to release, Phi-4-reasoning-plus followed a multi-faceted evaluation approach. Quantitative evaluation was conducted with multiple open-source safety benchmarks and in-house tools utilizing adversarial conversation simulation. For qualitative safety evaluation, we collaborated with the independent AI Red Team (AIRT) at Microsoft to assess safety risks posed by Phi-4-reasoning-plus in both average and adversarial user scenarios. In the average user scenario, AIRT emulated typical single-turn and multi-turn interactions to identify potentially risky behaviors. The adversarial user scenario tested a wide range of techniques aimed at intentionally subverting the model's safety training including grounded-ness, jailbreaks, harmful content like hate and unfairness, violence, sexual content, or self-harm, and copyright violations for protected material. We further evaluate models on Toxigen, a benchmark designed to measure bias and toxicity targeted towards minority groups. \n\nPlease refer to the technical report for more details on safety alignment. \n\n## Model Quality\n\nAt the high-level overview of the model quality on representative benchmarks. For the tables below, higher numbers indicate better performance:\n\n| | AIME 24 | AIME 25 | OmniMath | GPQA-D | LiveCodeBench (8/1/24–2/1/25) |\n|-----------------------------|-------------|-------------|-------------|------------|-------------------------------|\n| Phi-4-reasoning | 75.3 | 62.9 | 76.6 | 65.8 | 53.8 |\n| Phi-4-reasoning-plus | 81.3 | 78.0 | 81.9 | 68.9 | 53.1 |\n| OpenThinker2-32B | 58.0 | 58.0 | — | 64.1 | — |\n| QwQ 32B | 79.5 | 65.8 | — | 59.5 | 63.4 |\n| EXAONE-Deep-32B | 72.1 | 65.8 | — | 66.1 | 59.5 |\n| DeepSeek-R1-Distill-70B | 69.3 | 51.5 | 63.4 | 66.2 | 57.5 |\n| DeepSeek-R1 | 78.7 | 70.4 | 85.0 | 73.0 | 62.8 |\n| o1-mini | 63.6 | 54.8 | — | 60.0 | 53.8 |\n| o1 | 74.6 | 75.3 | 67.5 | 76.7 | 71.0 |\n| o3-mini | 88.0 | 78.0 | 74.6 | 77.7 | 69.5 |\n| Claude-3.7-Sonnet | 55.3 | 58.7 | 54.6 | 76.8 | — |\n| Gemini-2.5-Pro | 92.0 | 86.7 | 61.1 | 84.0 | 69.2 |\n\n| | Phi-4 | Phi-4-reasoning | Phi-4-reasoning-plus | o3-mini | GPT-4o |\n|----------------------------------------|-------|------------------|-------------------|---------|--------|\n| FlenQA [3K-token subset] | 82.0 | 97.7 | 97.9 | 96.8 | 90.8 |\n| IFEval Strict | 62.3 | 83.4 | 84.9 | 91.5 | 81.8 |\n| ArenaHard | 68.1 | 73.3 | 79.0 | 81.9 | 75.6 |\n| HumanEvalPlus | 83.5 | 92.9 | 92.3 | 94.0| 88.0 |\n| MMLUPro | 71.5 | 74.3 | 76.0 | 79.4 | 73.0 |\n| Kitab
No Context - Precision
With Context - Precision
No Context - Recall
With Context - Recall |
19.3
88.5
8.2
68.1 |
23.2
91.5
4.9
74.8 |
27.6
93.6
6.3
75.4 |
37.9
94.0
4.2
76.1 |
53.7
84.7
20.3
69.2 |\n| Toxigen Discriminative
Toxic category
Neutral category |
72.6
90.0 |
86.7
84.7 |
77.3
90.5 |
85.4
88.7 |
87.6
85.1 |\n| PhiBench 2.21 | 58.2 | 70.6 | 74.2 | 78.0| 72.4 |\n\nOverall, Phi-4-reasoning and Phi-4-reasoning-plus, with only 14B parameters, performs well across a wide range of reasoning tasks, outperforming significantly larger open-weight models such as DeepSeek-R1 distilled 70B model and approaching the performance levels of full DeepSeek R1 model. We also test the models on multiple new reasoning benchmarks for algorithmic problem solving and planning, including 3SAT, TSP, and BA-Calendar. These new tasks are nominally out-of-domain for the models as the training process did not intentionally target these skills, but the models still show strong generalization to these tasks. Furthermore, when evaluating performance against standard general abilities benchmarks such as instruction following or non-reasoning tasks, we find that our new models improve significantly from Phi-4, despite the post-training being focused on reasoning skills in specific domains. \n\n## Responsible AI Considerations\n\nLike other language models, Phi-4-reasoning-plus can potentially behave in ways that are unfair, unreliable, or offensive. Some of the limiting behaviors to be aware of include: \n\n* **Quality of Service:** The model is trained primarily on English text. Languages other than English will experience worse performance. English language varieties with less representation in the training data might experience worse performance than standard American English. Phi-4-reasoning-plus is not intended to support multilingual use. \n\n* **Representation of Harms & Perpetuation of Stereotypes:** These models can over- or under-represent groups of people, erase representation of some groups, or reinforce demeaning or negative stereotypes. Despite safety post-training, these limitations may still be present due to differing levels of representation of different groups or prevalence of examples of negative stereotypes in training data that reflect real-world patterns and societal biases. \n\n* **Inappropriate or Offensive Content:** These models may produce other types of inappropriate or offensive content, which may make it inappropriate to deploy for sensitive contexts without additional mitigations that are specific to the use case. \n\n* **Information Reliability:** Language models can generate nonsensical content or fabricate content that might sound reasonable but is inaccurate or outdated.\n\n* **Election Information Reliability:** The model has an elevated defect rate when responding to election-critical queries, which may result in incorrect or unauthoritative election critical information being presented. We are working to improve the model's performance in this area. Users should verify information related to elections with the election authority in their region. \n\n* **Limited Scope for Code:** Majority of Phi-4-reasoning-plus training data is based in Python and uses common packages such as `typing`, `math`, `random`, `collections`, `datetime`, `itertools`. If the model generates Python scripts that utilize other packages or scripts in other languages, we strongly recommend users manually verify all API uses. \n\nDevelopers should apply responsible AI best practices and are responsible for ensuring that a specific use case complies with relevant laws and regulations (e.g. privacy, trade, etc.). Using safety services like [Azure AI Content Safety](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety) that have advanced guardrails is highly recommended. Important areas for consideration include:\n\n* **Allocation:** Models may not be suitable for scenarios that could have consequential impact on legal status or the allocation of resources or life opportunities (ex: housing, employment, credit, etc.) without further assessments and additional debiasing techniques. \n\n* **High-Risk Scenarios:** Developers should assess suitability of using models in high-risk scenarios where unfair, unreliable or offensive outputs might be extremely costly or lead to harm. This includes providing advice in sensitive or expert domains where accuracy and reliability are critical (ex: legal or health advice). Additional safeguards should be implemented at the application level according to the deployment context. \n\n* **Misinformation:** Models may produce inaccurate information. Developers should follow transparency best practices and inform end-users they are interacting with an AI system. At the application level, developers can build feedback mechanisms and pipelines to ground responses in use-case specific, contextual information, a technique known as Retrieval Augmented Generation (RAG). \n\n* **Generation of Harmful Content:** Developers should assess outputs for their context and use available safety classifiers or custom solutions appropriate for their use case. \n\n* **Misuse:** Other forms of misuse such as fraud, spam, or malware production may be possible, and developers should ensure that their applications do not violate applicable laws and regulations.\n",
"registry": "Hugging Face",
"license": "mit",
"url": "https://huggingface.co/unsloth/Phi-4-reasoning-plus-GGUF/resolve/main/Phi-4-reasoning-plus-Q4_K_M.gguf",
"memory": 9715463520,
"properties": {
"jinja": "true"
},
"sha256": "faf720745e20df40f52ee218be14c72b33070f7aacc508b3fbc61d47f32b4ffe",
"backend": "llama-cpp"
},
{
"id": "hf.google.gemma-3n-E4B",
"name": "google/gemma-3n-E4B (Unsloth quantization)",
"description": "# Gemma 3n model card\n\n**Model Page**: [Gemma 3n](https://ai.google.dev/gemma/docs/gemma-3n)\n\n**Resources and Technical Documentation**:\n\n- [Responsible Generative AI Toolkit](https://ai.google.dev/responsible)\n- [Gemma on Kaggle](https://www.kaggle.com/models/google/gemma-3n)\n- [Gemma on HuggingFace](https://huggingface.co/collections/google/gemma-3n-685065323f5984ef315c93f4)\n- [Gemma on Vertex Model Garden](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/gemma3n)\n\n**Terms of Use**: [Terms](https://ai.google.dev/gemma/terms)\\\n**Authors**: Google DeepMind\n\n## Model Information\n\nSummary description and brief definition of inputs and outputs.\n\n### Description\n\nGemma is a family of lightweight, state-of-the-art open models from Google,\nbuilt from the same research and technology used to create the Gemini models.\nGemma 3n models are designed for efficient execution on low-resource devices.\nThey are capable of multimodal input, handling text, image, video, and audio\ninput, and generating text outputs, with open weights for pre-trained and\ninstruction-tuned variants. These models were trained with data in over 140\nspoken languages.\n\nGemma 3n models use selective parameter activation technology to reduce resource\nrequirements. This technique allows the models to operate at an effective size\nof 2B and 4B parameters, which is lower than the total number of parameters they\ncontain. For more information on Gemma 3n's efficient parameter management\ntechnology, see the\n[Gemma 3n](https://ai.google.dev/gemma/docs/gemma-3n#parameters)\npage.\n\n### Inputs and outputs\n\n- **Input:**\n - Text string, such as a question, a prompt, or a document to be\n summarized\n - Images, normalized to 256x256, 512x512, or 768x768 resolution\n and encoded to 256 tokens each\n - Audio data encoded to 6.25 tokens per second from a single channel\n - Total input context of 32K tokens\n- **Output:**\n - Generated text in response to the input, such as an answer to a\n question, analysis of image content, or a summary of a document\n - Total output length up to 32K tokens, subtracting the request\n input tokens\n\n### Usage\n\nBelow, there are some code snippets on how to get quickly started with running\nthe model. First, install the Transformers library. Gemma 3n is supported\nstarting from transformers 4.53.0.\n\n```sh\n$ pip install -U transformers\n```\n\nThen, copy the snippet from the section that is relevant for your use case.\n\n#### Running with the `pipeline` API\n\nYou can initialize the model and processor for inference with `pipeline` as\nfollows.\n\n```python\nfrom transformers import pipeline\nimport torch\n\npipe = pipeline(\n \"image-text-to-text\",\n model=\"google/gemma-3n-e4b-it\",\n device=\"cuda\",\n torch_dtype=torch.bfloat16,\n)\n```\n\nWith instruction-tuned models, you need to use chat templates to process our\ninputs first. Then, you can pass it to the pipeline.\n\n```python\nmessages = [\n {\n \"role\": \"system\",\n \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"url\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG\"},\n {\"type\": \"text\", \"text\": \"What animal is on the candy?\"}\n ]\n }\n]\n\noutput = pipe(text=messages, max_new_tokens=200)\nprint(output[0][\"generated_text\"][-1][\"content\"])\n# Okay, let's take a look!\n# Based on the image, the animal on the candy is a **turtle**.\n# You can see the shell shape and the head and legs.\n```\n\n#### Running the model on a single GPU\n\n```python\nfrom transformers import AutoProcessor, Gemma3nForConditionalGeneration\nfrom PIL import Image\nimport requests\nimport torch\n\nmodel_id = \"google/gemma-3n-e4b-it\"\n\nmodel = Gemma3nForConditionalGeneration.from_pretrained(model_id, device_map=\"auto\", torch_dtype=torch.bfloat16,).eval()\n\nprocessor = AutoProcessor.from_pretrained(model_id)\n\nmessages = [\n {\n \"role\": \"system\",\n \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"image\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg\"},\n {\"type\": \"text\", \"text\": \"Describe this image in detail.\"}\n ]\n }\n]\n\ninputs = processor.apply_chat_template(\n messages,\n add_generation_prompt=True,\n tokenize=True,\n return_dict=True,\n return_tensors=\"pt\",\n).to(model.device)\n\ninput_len = inputs[\"input_ids\"].shape[-1]\n\nwith torch.inference_mode():\n generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)\n generation = generation[0][input_len:]\n\ndecoded = processor.decode(generation, skip_special_tokens=True)\nprint(decoded)\n\n# **Overall Impression:** The image is a close-up shot of a vibrant garden scene,\n# focusing on a cluster of pink cosmos flowers and a busy bumblebee.\n# It has a slightly soft, natural feel, likely captured in daylight.\n```\n\n### Citation\n\n```\n@article{gemma_3n_2025,\n title={Gemma 3n},\n url={https://ai.google.dev/gemma/docs/gemma-3n},\n publisher={Google DeepMind},\n author={Gemma Team},\n year={2025}\n}\n```\n\n## Model Data\n\nData used for model training and how the data was processed.\n\n### Training Dataset\n\nThese models were trained on a dataset that includes a wide variety of sources\ntotalling approximately 11 trillion tokens. The knowledge cutoff date for the\ntraining data was June 2024. Here are the key components:\n\n- **Web Documents**: A diverse collection of web text ensures the model\n is exposed to a broad range of linguistic styles, topics, and vocabulary.\n The training dataset includes content in over 140 languages.\n- **Code**: Exposing the model to code helps it to learn the syntax and\n patterns of programming languages, which improves its ability to generate\n code and understand code-related questions.\n- **Mathematics**: Training on mathematical text helps the model learn\n logical reasoning, symbolic representation, and to address mathematical queries.\n- **Images**: A wide range of images enables the model to perform image\n analysis and visual data extraction tasks.\n- Audio: A diverse set of sound samples enables the model to recognize\n speech, transcribe text from recordings, and identify information in audio data.\n\nThe combination of these diverse data sources is crucial for training a\npowerful multimodal model that can handle a wide variety of different tasks and\ndata formats.\n\n### Data Preprocessing\n\nHere are the key data cleaning and filtering methods applied to the training\ndata:\n\n- **CSAM Filtering**: Rigorous CSAM (Child Sexual Abuse Material)\n filtering was applied at multiple stages in the data preparation process to\n ensure the exclusion of harmful and illegal content.\n- **Sensitive Data Filtering**: As part of making Gemma pre-trained models\n safe and reliable, automated techniques were used to filter out certain\n personal information and other sensitive data from training sets.\n- **Additional methods**: Filtering based on content quality and safety in\n line with\n [our policies](https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf).\n\n## Implementation Information\n\nDetails about the model internals.\n\n### Hardware\n\nGemma was trained using [Tensor Processing Unit\n(TPU)](https://cloud.google.com/tpu/docs/intro-to-tpu) hardware (TPUv4p, TPUv5p\nand TPUv5e). Training generative models requires significant computational\npower. TPUs, designed specifically for matrix operations common in machine\nlearning, offer several advantages in this domain:\n\n- **Performance**: TPUs are specifically designed to handle the massive\n computations involved in training generative models. They can speed up\n training considerably compared to CPUs.\n- **Memory**: TPUs often come with large amounts of high-bandwidth memory,\n allowing for the handling of large models and batch sizes during training.\n This can lead to better model quality.\n- **Scalability**: TPU Pods (large clusters of TPUs) provide a scalable\n solution for handling the growing complexity of large foundation models.\n You can distribute training across multiple TPU devices for faster and more\n efficient processing.\n- **Cost-effectiveness**: In many scenarios, TPUs can provide a more\n cost-effective solution for training large models compared to CPU-based\n infrastructure, especially when considering the time and resources saved\n due to faster training.\n\nThese advantages are aligned with\n[Google's commitments to operate sustainably](https://sustainability.google/operating-sustainably/).\n\n### Software\n\nTraining was done using [JAX](https://github.com/jax-ml/jax) and\n[ML Pathways](https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/).\nJAX allows researchers to take advantage of the latest generation of hardware,\nincluding TPUs, for faster and more efficient training of large models. ML\nPathways is Google's latest effort to build artificially intelligent systems\ncapable of generalizing across multiple tasks. This is specially suitable for\nfoundation models, including large language models like these ones.\n\nTogether, JAX and ML Pathways are used as described in the\n[paper about the Gemini family of models](https://goo.gle/gemma2report):\n*\"the 'single controller' programming model of Jax and Pathways allows a single\nPython process to orchestrate the entire training run, dramatically simplifying\nthe development workflow.\"*\n\n## Evaluation\n\nModel evaluation metrics and results.\n\n### Benchmark Results\n\nThese models were evaluated at full precision (float32) against a large\ncollection of different datasets and metrics to cover different aspects of\ncontent generation. Evaluation results marked with **IT** are for\ninstruction-tuned models. Evaluation results marked with **PT** are for\npre-trained models.\n\n#### Reasoning and factuality\n\n| Benchmark | Metric | n-shot | E2B PT | E4B PT |\n| ------------------------------ |----------------|----------|:--------:|:--------:|\n| [HellaSwag][hellaswag] | Accuracy | 10-shot | 72.2 | 78.6 |\n| [BoolQ][boolq] | Accuracy | 0-shot | 76.4 | 81.6 |\n| [PIQA][piqa] | Accuracy | 0-shot | 78.9 | 81.0 |\n| [SocialIQA][socialiqa] | Accuracy | 0-shot | 48.8 | 50.0 |\n| [TriviaQA][triviaqa] | Accuracy | 5-shot | 60.8 | 70.2 |\n| [Natural Questions][naturalq] | Accuracy | 5-shot | 15.5 | 20.9 |\n| [ARC-c][arc] | Accuracy | 25-shot | 51.7 | 61.6 |\n| [ARC-e][arc] | Accuracy | 0-shot | 75.8 | 81.6 |\n| [WinoGrande][winogrande] | Accuracy | 5-shot | 66.8 | 71.7 |\n| [BIG-Bench Hard][bbh] | Accuracy | few-shot | 44.3 | 52.9 |\n| [DROP][drop] | Token F1 score | 1-shot | 53.9 | 60.8 |\n\n[hellaswag]: https://arxiv.org/abs/1905.07830\n[boolq]: https://arxiv.org/abs/1905.10044\n[piqa]: https://arxiv.org/abs/1911.11641\n[socialiqa]: https://arxiv.org/abs/1904.09728\n[triviaqa]: https://arxiv.org/abs/1705.03551\n[naturalq]: https://github.com/google-research-datasets/natural-questions\n[arc]: https://arxiv.org/abs/1911.01547\n[winogrande]: https://arxiv.org/abs/1907.10641\n[bbh]: https://paperswithcode.com/dataset/bbh\n[drop]: https://arxiv.org/abs/1903.00161\n\n#### Multilingual\n\n| Benchmark | Metric | n-shot | E2B IT | E4B IT |\n| ------------------------------------|-------------------------|----------|:--------:|:--------:|\n| [MGSM][mgsm] | Accuracy | 0-shot | 53.1 | 60.7 |\n| [WMT24++][wmt24pp] (ChrF) | Character-level F-score | 0-shot | 42.7 | 50.1 |\n| [Include][include] | Accuracy | 0-shot | 38.6 | 57.2 |\n| [MMLU][mmlu] (ProX) | Accuracy | 0-shot | 8.1 | 19.9 |\n| [OpenAI MMLU][openai-mmlu] | Accuracy | 0-shot | 22.3 | 35.6 |\n| [Global-MMLU][global-mmlu] | Accuracy | 0-shot | 55.1 | 60.3 |\n| [ECLeKTic][eclektic] | ECLeKTic score | 0-shot | 2.5 | 1.9 |\n\n[mgsm]: https://arxiv.org/abs/2210.03057\n[wmt24pp]: https://arxiv.org/abs/2502.12404v1\n[include]:https://arxiv.org/abs/2411.19799\n[mmlu]: https://arxiv.org/abs/2009.03300\n[openai-mmlu]: https://huggingface.co/datasets/openai/MMMLU\n[global-mmlu]: https://huggingface.co/datasets/CohereLabs/Global-MMLU\n[eclektic]: https://arxiv.org/abs/2502.21228\n\n#### STEM and code\n\n| Benchmark | Metric | n-shot | E2B IT | E4B IT |\n| ------------------------------------|--------------------------|----------|:--------:|:--------:|\n| [GPQA][gpqa] Diamond | RelaxedAccuracy/accuracy | 0-shot | 24.8 | 23.7 |\n| [LiveCodeBench][lcb] v5 | pass@1 | 0-shot | 18.6 | 25.7 |\n| Codegolf v2.2 | pass@1 | 0-shot | 11.0 | 16.8 |\n| [AIME 2025][aime-2025] | Accuracy | 0-shot | 6.7 | 11.6 |\n\n[gpqa]: https://arxiv.org/abs/2311.12022\n[lcb]: https://arxiv.org/abs/2403.07974\n[aime-2025]: https://www.vals.ai/benchmarks/aime-2025-05-09\n\n#### Additional benchmarks\n\n| Benchmark | Metric | n-shot | E2B IT | E4B IT |\n| ------------------------------------ |------------|----------|:--------:|:--------:|\n| [MMLU][mmlu] | Accuracy | 0-shot | 60.1 | 64.9 |\n| [MBPP][mbpp] | pass@1 | 3-shot | 56.6 | 63.6 |\n| [HumanEval][humaneval] | pass@1 | 0-shot | 66.5 | 75.0 |\n| [LiveCodeBench][lcb] | pass@1 | 0-shot | 13.2 | 13.2 |\n| HiddenMath | Accuracy | 0-shot | 27.7 | 37.7 |\n| [Global-MMLU-Lite][global-mmlu-lite] | Accuracy | 0-shot | 59.0 | 64.5 |\n| [MMLU][mmlu] (Pro) | Accuracy | 0-shot | 40.5 | 50.6 |\n\n[gpqa]: https://arxiv.org/abs/2311.12022\n[mbpp]: https://arxiv.org/abs/2108.07732\n[humaneval]: https://arxiv.org/abs/2107.03374\n[lcb]: https://arxiv.org/abs/2403.07974\n[global-mmlu-lite]: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite\n\n## Ethics and Safety\n\nEthics and safety evaluation approach and results.\n\n### Evaluation Approach\n\nOur evaluation methods include structured evaluations and internal red-teaming\ntesting of relevant content policies. Red-teaming was conducted by a number of\ndifferent teams, each with different goals and human evaluation metrics. These\nmodels were evaluated against a number of different categories relevant to\nethics and safety, including:\n\n- **Child Safety**: Evaluation of text-to-text and image to text prompts\n covering child safety policies, including child sexual abuse and\n exploitation.\n- **Content Safety:** Evaluation of text-to-text and image to text prompts\n covering safety policies including, harassment, violence and gore, and hate\n speech.\n- **Representational Harms**: Evaluation of text-to-text and image to text\n prompts covering safety policies including bias, stereotyping, and harmful\n associations or inaccuracies.\n\nIn addition to development level evaluations, we conduct \"assurance\nevaluations\" which are our 'arms-length' internal evaluations for responsibility\ngovernance decision making. They are conducted separately from the model\ndevelopment team, to inform decision making about release. High level findings\nare fed back to the model team, but prompt sets are held-out to prevent\noverfitting and preserve the results' ability to inform decision making. Notable\nassurance evaluation results are reported to our Responsibility & Safety Council\nas part of release review.\n\n### Evaluation Results\n\nFor all areas of safety testing, we saw safe levels of performance across the\ncategories of child safety, content safety, and representational harms relative\nto previous Gemma models. All testing was conducted without safety filters to\nevaluate the model capabilities and behaviors. For text-to-text, image-to-text,\nand audio-to-text, and across all model sizes, the model produced minimal policy\nviolations, and showed significant improvements over previous Gemma models'\nperformance with respect to high severity violations. A limitation of our\nevaluations was they included primarily English language prompts.\n\n## Usage and Limitations\n\nThese models have certain limitations that users should be aware of.\n\n### Intended Usage\n\nOpen generative models have a wide range of applications across various\nindustries and domains. The following list of potential uses is not\ncomprehensive. The purpose of this list is to provide contextual information\nabout the possible use-cases that the model creators considered as part of model\ntraining and development.\n\n- Content Creation and Communication\n - **Text Generation**: Generate creative text formats such as\n poems, scripts, code, marketing copy, and email drafts.\n - **Chatbots and Conversational AI**: Power conversational\n interfaces for customer service, virtual assistants, or interactive\n applications.\n - **Text Summarization**: Generate concise summaries of a text\n corpus, research papers, or reports.\n - **Image Data Extraction**: Extract, interpret, and summarize\n visual data for text communications.\n - **Audio Data Extraction**: Transcribe spoken language, translate speech\n to text in other languages, and analyze sound-based data.\n- Research and Education\n - **Natural Language Processing (NLP) and generative model\n Research**: These models can serve as a foundation for researchers to\n experiment with generative models and NLP techniques, develop\n algorithms, and contribute to the advancement of the field.\n - **Language Learning Tools**: Support interactive language\n learning experiences, aiding in grammar correction or providing writing\n practice.\n - **Knowledge Exploration**: Assist researchers in exploring large\n bodies of data by generating summaries or answering questions about\n specific topics.\n\n### Limitations\n\n- Training Data\n - The quality and diversity of the training data significantly\n influence the model's capabilities. Biases or gaps in the training data\n can lead to limitations in the model's responses.\n - The scope of the training dataset determines the subject areas\n the model can handle effectively.\n- Context and Task Complexity\n - Models are better at tasks that can be framed with clear\n prompts and instructions. Open-ended or highly complex tasks might be\n challenging.\n - A model's performance can be influenced by the amount of context\n provided (longer context generally leads to better outputs, up to a\n certain point).\n- Language Ambiguity and Nuance\n - Natural language is inherently complex. Models might struggle\n to grasp subtle nuances, sarcasm, or figurative language.\n- Factual Accuracy\n - Models generate responses based on information they learned\n from their training datasets, but they are not knowledge bases. They\n may generate incorrect or outdated factual statements.\n- Common Sense\n - Models rely on statistical patterns in language. They might\n lack the ability to apply common sense reasoning in certain situations.\n\n### Ethical Considerations and Risks\n\nThe development of generative models raises several ethical concerns. In\ncreating an open model, we have carefully considered the following:\n\n- Bias and Fairness\n - Generative models trained on large-scale, real-world text and image data\n can reflect socio-cultural biases embedded in the training material.\n These models underwent careful scrutiny, input data pre-processing\n described and posterior evaluations reported in this card.\n- Misinformation and Misuse\n - Generative models can be misused to generate text that is\n false, misleading, or harmful.\n - Guidelines are provided for responsible use with the model, see the\n [Responsible Generative AI Toolkit](https://ai.google.dev/responsible).\n- Transparency and Accountability:\n - This model card summarizes details on the models' architecture,\n capabilities, limitations, and evaluation processes.\n - A responsibly developed open model offers the opportunity to\n share innovation by making generative model technology accessible to\n developers and researchers across the AI ecosystem.\n\nRisks identified and mitigations:\n\n- **Perpetuation of biases**: It's encouraged to perform continuous monitoring\n (using evaluation metrics, human review) and the exploration of de-biasing\n techniques during model training, fine-tuning, and other use cases.\n- **Generation of harmful content**: Mechanisms and guidelines for content\n safety are essential. Developers are encouraged to exercise caution and\n implement appropriate content safety safeguards based on their specific\n product policies and application use cases.\n- **Misuse for malicious purposes**: Technical limitations and developer\n and end-user education can help mitigate against malicious applications of\n generative models. Educational resources and reporting mechanisms for users\n to flag misuse are provided. Prohibited uses of Gemma models are outlined\n in the\n [Gemma Prohibited Use Policy](https://ai.google.dev/gemma/prohibited_use_policy).\n- **Privacy violations**: Models were trained on data filtered for removal of\n certain personal information and other sensitive data. Developers are\n encouraged to adhere to privacy regulations with privacy-preserving\n techniques.\n\n### Benefits\n\nAt the time of release, this family of models provides high-performance open\ngenerative model implementations designed from the ground up for responsible AI\ndevelopment compared to similarly sized models.\n\nUsing the benchmark evaluation metrics described in this document, these models\nhave shown to provide superior performance to other, comparably-sized open model\nalternatives.",
"registry": "Hugging Face",
"license": "gemma",
"url": "https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF/resolve/main/gemma-3n-E4B-it-Q4_K_M.gguf",
"memory": 4425974,
"properties": {
"jinja": "true"
},
"sha256": "43b489bb77a81bda85180e7c490d40ad7f1d5c2ce654c9b05e15e104bd3c777e",
"backend": "llama-cpp"
},
{
"id": "OpenVINO/mistral-7B-instruct-v0.2-int4-ov",
"name": "OpenVINO/mistral-7B-instruct-v0.2-int4-ov",
"description": "# Mistral-7B-Instruct-v0.2-int4-ov\n* Model creator: [Mistral AI](https://huggingface.co/mistralai)\n * Original model: [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)\n\n## Description\n\nThis is [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) model converted to the [OpenVINO™ IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) (Intermediate Representation) format.\n\n## Compatibility\n\nThe provided OpenVINO™ IR model is compatible with:\n\n* OpenVINO version 2024.2.0 and higher\n* Optimum Intel 1.19.0 and higher\n\n## Running Model Inference with [Optimum Intel](https://huggingface.co/docs/optimum/intel/index)\n\n\n1. Install packages required for using [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) integration with the OpenVINO backend:\n\n```\npip install optimum[openvino]\n```\n\n2. Run model inference:\n\n```\nfrom transformers import AutoTokenizer\nfrom optimum.intel.openvino import OVModelForCausalLM\n\nmodel_id = \"OpenVINO/\"\ntokenizer = AutoTokenizer.from_pretrained(model_id)\nmodel = OVModelForCausalLM.from_pretrained(model_id)\n\ninputs = tokenizer(\"What is OpenVINO?\", return_tensors=\"pt\")\n\noutputs = model.generate(**inputs, max_length=200)\ntext = tokenizer.batch_decode(outputs)[0]\nprint(text)\n```\n\nFor more examples and possible optimizations, refer to the [OpenVINO Large Language Model Inference Guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).\n\n## Running Model Inference with [OpenVINO GenAI](https://github.com/openvinotoolkit/openvino.genai)\n\n1. Install packages required for using OpenVINO GenAI.\n```\npip install openvino-genai huggingface_hub\n```\n\n2. Download model from HuggingFace Hub\n \n```\nimport huggingface_hub as hf_hub\n\nmodel_id = \"OpenVINO/Mistral-7B-Instruct-v0.2-int4-ov\"\nmodel_path = \"Mistral-7B-Instruct-v0.2-int4-ov\"\n\nhf_hub.snapshot_download(model_id, local_dir=model_path)\n\n```\n\n3. Run model inference:\n\n```\nimport openvino_genai as ov_genai\n\ndevice = \"CPU\"\npipe = ov_genai.LLMPipeline(model_path, device)\nprint(pipe.generate(\"What is OpenVINO?\", max_length=200))\n```\n\nMore GenAI usage examples can be found in OpenVINO GenAI library [docs](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md) and [samples](https://github.com/openvinotoolkit/openvino.genai?tab=readme-ov-file#openvino-genai-samples)\n\n## Limitations\n\nCheck the original model card for [limitations](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2#limitations).\n\n## Legal information\n\nThe original model is distributed under [apache-2.0](https://choosealicense.com/licenses/apache-2.0/) license. More details can be found in [original model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2).\n\n## Disclaimer\n\nIntel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights.",
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "huggingface:/OpenVINO/mistral-7B-instruct-v0.2-int4-ov",
"backend": "openvino"
}
],
"categories": [
{
"id": "natural-language-processing",
"name": "Natural Language Processing",
"description": "Models that work with text: classify, summarize, translate, or generate text."
},
{
"id": "computer-vision",
"description": "Process images, from classification to object detection and segmentation.",
"name": "Computer Vision"
},
{
"id": "audio",
"description": "Recognize speech or classify audio with audio models.",
"name": "Audio"
},
{
"id": "multimodal",
"description": "Stuff about multimodal models goes here omg yes amazing.",
"name": "Multimodal"
}
]
}
================================================
FILE: packages/backend/src/assets/inference-images.json
================================================
{
"whispercpp": {
"default": "quay.io/ramalama/ramalama-whisper-server@sha256:2ce4e2751672e3baf76d6f220100160da86ff5a98001b76392aeae9da2d90b18"
},
"llamacpp": {
"default": "quay.io/ramalama/ramalama-llama-server@sha256:293f66f2dfea8e21393dc03e898616b2a71f0a72a0f3bc5f936439130ada2648",
"cuda": "quay.io/ramalama/cuda-llama-server@sha256:b9ced640539c72edee2f946b69618a6d30b68700ac9342d1b9483831988d40ef",
"intel": "quay.io/ramalama/intel-gpu-llama-server@sha256:ea2aa37c0a4af544de80da9d8aa53a0641c91ccfdca3a329a251685a96210551"
},
"openvino": {
"default": "quay.io/ramalama/openvino@sha256:e026ecbdf6ae222a193badad5b0dd2253362e366e22c8b402f5a492803b10fd5"
}
}
================================================
FILE: packages/backend/src/assets/instructlab-images.json
================================================
{
"default": "docker.io/redhat/instructlab@sha256:c6b2ecb4547b1f43b5539ee99bdbf5c9ae40599fabe1c740622295d9721b91c4"
}
================================================
FILE: packages/backend/src/assets/llama-stack-images.json
================================================
{
"default": "ghcr.io/containers/podman-ai-lab-stack:a06f399ebf7cb2645af126da0e84395db9bb0d1a"
}
================================================
FILE: packages/backend/src/assets/llama-stack-playground-images.json
================================================
{
"default": "quay.io/podman-ai-lab/llama-stack-playground@sha256:2ee73137c0b2b401c2703b5881dd84c07f0baa385408e7c02f076a2804c689c2"
}
================================================
FILE: packages/backend/src/assets/openai.json
================================================
{
"openapi": "3.1.0",
"info": {
"title": "OpenAI API",
"version": "0.3.2"
},
"servers": [
{
"url": "",
"description": "description"
}
],
"paths": {
"/v1/completions": {
"post": {
"tags": ["OpenAI V1"],
"summary": "Completion",
"operationId": "create_completion_v1_completions_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateCompletionRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"anyOf": [
{
"$ref": "#/components/schemas/CreateCompletionResponse"
},
{
"type": "string"
},
{
"$ref": "#/components/schemas/CreateCompletionResponse"
}
],
"title": "Completion response, when stream=False"
}
},
"text/event-stream": {
"schema": {
"type": "string",
"title": "Server Side Streaming response, when stream=True. See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format",
"example": "data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
},
"/v1/embeddings": {
"post": {
"tags": ["OpenAI V1"],
"summary": "Embedding",
"operationId": "create_embedding_v1_embeddings_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateEmbeddingRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
},
"/v1/chat/completions": {
"post": {
"tags": ["OpenAI V1"],
"summary": "Chat",
"operationId": "create_chat_completion_v1_chat_completions_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateChatCompletionRequest"
},
"examples": {
"normal": {
"summary": "Chat Completion",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
]
}
},
"json_mode": {
"summary": "JSON Mode",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Who won the world series in 2020"
}
],
"response_format": {
"type": "json_object"
}
}
},
"tool_calling": {
"summary": "Tool Calling",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Extract Jason is 30 years old."
}
],
"tools": [
{
"type": "function",
"function": {
"name": "User",
"description": "User record",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}
}
}
],
"tool_choice": {
"type": "function",
"function": {
"name": "User"
}
}
}
},
"logprobs": {
"summary": "Logprobs",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
],
"logprobs": true,
"top_logprobs": 10
}
}
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"anyOf": [
{
"$ref": "#/components/schemas/CreateChatCompletionResponse"
},
{
"type": "string"
},
{
"$ref": "#/components/schemas/CreateChatCompletionResponse"
}
],
"title": "Completion response, when stream=False"
}
},
"text/event-stream": {
"schema": {
"type": "string",
"title": "Server Side Streaming response, when stream=TrueSee SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format",
"example": "data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
},
"/v1/models": {
"get": {
"tags": ["OpenAI V1"],
"summary": "Models",
"operationId": "get_models_v1_models_get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ModelList"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
},
"/extras/tokenize": {
"post": {
"tags": ["Extras"],
"summary": "Tokenize",
"operationId": "tokenize_extras_tokenize_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/TokenizeInputRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/TokenizeInputResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
},
"/extras/tokenize/count": {
"post": {
"tags": ["Extras"],
"summary": "Tokenize Count",
"operationId": "count_query_tokens_extras_tokenize_count_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/TokenizeInputRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/TokenizeInputCountResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
},
"/extras/detokenize": {
"post": {
"tags": ["Extras"],
"summary": "Detokenize",
"operationId": "detokenize_extras_detokenize_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DetokenizeInputRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DetokenizeInputResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"security": [
{
"HTTPBearer": []
}
]
}
}
},
"components": {
"schemas": {
"ChatCompletionFunction": {
"properties": {
"name": {
"type": "string",
"title": "Name"
},
"description": {
"type": "string",
"title": "Description"
},
"parameters": {
"additionalProperties": {
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
},
{
"type": "boolean"
},
{
"items": {},
"type": "array"
},
{
"type": "object"
},
{
"type": "null"
}
]
},
"type": "object",
"title": "Parameters"
}
},
"type": "object",
"required": ["name", "parameters"],
"title": "ChatCompletionFunction"
},
"ChatCompletionMessageToolCall": {
"properties": {
"id": {
"type": "string",
"title": "Id"
},
"type": {
"type": "string",
"const": "function",
"title": "Type"
},
"function": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCallFunction"
}
},
"type": "object",
"required": ["id", "type", "function"],
"title": "ChatCompletionMessageToolCall"
},
"ChatCompletionMessageToolCallFunction": {
"properties": {
"name": {
"type": "string",
"title": "Name"
},
"arguments": {
"type": "string",
"title": "Arguments"
}
},
"type": "object",
"required": ["name", "arguments"],
"title": "ChatCompletionMessageToolCallFunction"
},
"ChatCompletionNamedToolChoice": {
"properties": {
"type": {
"type": "string",
"const": "function",
"title": "Type"
},
"function": {
"$ref": "#/components/schemas/ChatCompletionNamedToolChoiceFunction"
}
},
"type": "object",
"required": ["type", "function"],
"title": "ChatCompletionNamedToolChoice"
},
"ChatCompletionNamedToolChoiceFunction": {
"properties": {
"name": {
"type": "string",
"title": "Name"
}
},
"type": "object",
"required": ["name"],
"title": "ChatCompletionNamedToolChoiceFunction"
},
"ChatCompletionRequestAssistantMessage": {
"properties": {
"role": {
"type": "string",
"const": "assistant",
"title": "Role"
},
"content": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Content"
},
"tool_calls": {
"items": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCall"
},
"type": "array",
"title": "Tool Calls"
},
"function_call": {
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageFunctionCall"
}
},
"type": "object",
"required": ["role", "content"],
"title": "ChatCompletionRequestAssistantMessage"
},
"ChatCompletionRequestAssistantMessageFunctionCall": {
"properties": {
"name": {
"type": "string",
"title": "Name"
},
"arguments": {
"type": "string",
"title": "Arguments"
}
},
"type": "object",
"required": ["name", "arguments"],
"title": "ChatCompletionRequestAssistantMessageFunctionCall"
},
"ChatCompletionRequestFunctionCallOption": {
"properties": {
"name": {
"type": "string",
"title": "Name"
}
},
"type": "object",
"required": ["name"],
"title": "ChatCompletionRequestFunctionCallOption"
},
"ChatCompletionRequestFunctionMessage": {
"properties": {
"role": {
"type": "string",
"const": "function",
"title": "Role"
},
"content": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Content"
},
"name": {
"type": "string",
"title": "Name"
}
},
"type": "object",
"required": ["role", "content", "name"],
"title": "ChatCompletionRequestFunctionMessage"
},
"ChatCompletionRequestMessageContentPartImage": {
"properties": {
"type": {
"type": "string",
"const": "image_url",
"title": "Type"
},
"image_url": {
"anyOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImageImageUrl"
}
],
"title": "Image Url"
}
},
"type": "object",
"required": ["type", "image_url"],
"title": "ChatCompletionRequestMessageContentPartImage"
},
"ChatCompletionRequestMessageContentPartImageImageUrl": {
"properties": {
"url": {
"type": "string",
"title": "Url"
},
"detail": {
"type": "string",
"enum": ["auto", "low", "high"],
"title": "Detail"
}
},
"type": "object",
"required": ["url"],
"title": "ChatCompletionRequestMessageContentPartImageImageUrl"
},
"ChatCompletionRequestMessageContentPartText": {
"properties": {
"type": {
"type": "string",
"const": "text",
"title": "Type"
},
"text": {
"type": "string",
"title": "Text"
}
},
"type": "object",
"required": ["type", "text"],
"title": "ChatCompletionRequestMessageContentPartText"
},
"ChatCompletionRequestResponseFormat": {
"properties": {
"type": {
"type": "string",
"enum": ["text", "json_object"],
"title": "Type"
},
"schema": {
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
},
{
"type": "boolean"
},
{
"items": {},
"type": "array"
},
{
"type": "object"
},
{
"type": "null"
}
],
"title": "Schema"
}
},
"type": "object",
"required": ["type"],
"title": "ChatCompletionRequestResponseFormat"
},
"ChatCompletionRequestSystemMessage": {
"properties": {
"role": {
"type": "string",
"const": "system",
"title": "Role"
},
"content": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Content"
}
},
"type": "object",
"required": ["role", "content"],
"title": "ChatCompletionRequestSystemMessage"
},
"ChatCompletionRequestToolMessage": {
"properties": {
"role": {
"type": "string",
"const": "tool",
"title": "Role"
},
"content": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Content"
},
"tool_call_id": {
"type": "string",
"title": "Tool Call Id"
}
},
"type": "object",
"required": ["role", "content", "tool_call_id"],
"title": "ChatCompletionRequestToolMessage"
},
"ChatCompletionRequestUserMessage": {
"properties": {
"role": {
"type": "string",
"const": "user",
"title": "Role"
},
"content": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"anyOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImage"
}
]
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Content"
}
},
"type": "object",
"required": ["role", "content"],
"title": "ChatCompletionRequestUserMessage"
},
"ChatCompletionResponseChoice": {
"properties": {
"index": {
"type": "integer",
"title": "Index"
},
"message": {
"$ref": "#/components/schemas/ChatCompletionResponseMessage"
},
"logprobs": {
"anyOf": [
{
"$ref": "#/components/schemas/CompletionLogprobs"
},
{
"type": "null"
}
]
},
"finish_reason": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Finish Reason"
}
},
"type": "object",
"required": ["index", "message", "logprobs", "finish_reason"],
"title": "ChatCompletionResponseChoice"
},
"ChatCompletionResponseFunctionCall": {
"properties": {
"name": {
"type": "string",
"title": "Name"
},
"arguments": {
"type": "string",
"title": "Arguments"
}
},
"type": "object",
"required": ["name", "arguments"],
"title": "ChatCompletionResponseFunctionCall"
},
"ChatCompletionResponseMessage": {
"properties": {
"content": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Content"
},
"tool_calls": {
"items": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCall"
},
"type": "array",
"title": "Tool Calls"
},
"role": {
"type": "string",
"enum": ["assistant", "function"],
"title": "Role"
},
"function_call": {
"$ref": "#/components/schemas/ChatCompletionResponseFunctionCall"
}
},
"type": "object",
"required": ["content", "role"],
"title": "ChatCompletionResponseMessage"
},
"ChatCompletionTool": {
"properties": {
"type": {
"type": "string",
"const": "function",
"title": "Type"
},
"function": {
"$ref": "#/components/schemas/ChatCompletionToolFunction"
}
},
"type": "object",
"required": ["type", "function"],
"title": "ChatCompletionTool"
},
"ChatCompletionToolFunction": {
"properties": {
"name": {
"type": "string",
"title": "Name"
},
"description": {
"type": "string",
"title": "Description"
},
"parameters": {
"additionalProperties": {
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
},
{
"type": "boolean"
},
{
"items": {},
"type": "array"
},
{
"type": "object"
},
{
"type": "null"
}
]
},
"type": "object",
"title": "Parameters"
}
},
"type": "object",
"required": ["name", "parameters"],
"title": "ChatCompletionToolFunction"
},
"CompletionChoice": {
"properties": {
"text": {
"type": "string",
"title": "Text"
},
"index": {
"type": "integer",
"title": "Index"
},
"logprobs": {
"anyOf": [
{
"$ref": "#/components/schemas/CompletionLogprobs"
},
{
"type": "null"
}
]
},
"finish_reason": {
"anyOf": [
{
"type": "string",
"enum": ["stop", "length"]
},
{
"type": "null"
}
],
"title": "Finish Reason"
}
},
"type": "object",
"required": ["text", "index", "logprobs", "finish_reason"],
"title": "CompletionChoice"
},
"CompletionLogprobs": {
"properties": {
"text_offset": {
"items": {
"type": "integer"
},
"type": "array",
"title": "Text Offset"
},
"token_logprobs": {
"items": {
"anyOf": [
{
"type": "number"
},
{
"type": "null"
}
]
},
"type": "array",
"title": "Token Logprobs"
},
"tokens": {
"items": {
"type": "string"
},
"type": "array",
"title": "Tokens"
},
"top_logprobs": {
"items": {
"anyOf": [
{
"additionalProperties": {
"type": "number"
},
"type": "object"
},
{
"type": "null"
}
]
},
"type": "array",
"title": "Top Logprobs"
}
},
"type": "object",
"required": ["text_offset", "token_logprobs", "tokens", "top_logprobs"],
"title": "CompletionLogprobs"
},
"CompletionUsage": {
"properties": {
"prompt_tokens": {
"type": "integer",
"title": "Prompt Tokens"
},
"completion_tokens": {
"type": "integer",
"title": "Completion Tokens"
},
"total_tokens": {
"type": "integer",
"title": "Total Tokens"
}
},
"type": "object",
"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
"title": "CompletionUsage"
},
"CreateChatCompletionRequest": {
"properties": {
"messages": {
"items": {
"anyOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestSystemMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestUserMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestToolMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestFunctionMessage"
}
]
},
"type": "array",
"title": "Messages",
"description": "A list of messages to generate completions for.",
"default": []
},
"functions": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ChatCompletionFunction"
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Functions",
"description": "A list of functions to apply to the generated completions."
},
"function_call": {
"anyOf": [
{
"type": "string",
"enum": ["none", "auto"]
},
{
"$ref": "#/components/schemas/ChatCompletionRequestFunctionCallOption"
},
{
"type": "null"
}
],
"title": "Function Call",
"description": "A function to apply to the generated completions."
},
"tools": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ChatCompletionTool"
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Tools",
"description": "A list of tools to apply to the generated completions."
},
"tool_choice": {
"anyOf": [
{
"type": "string",
"enum": ["none", "auto", "required"]
},
{
"$ref": "#/components/schemas/ChatCompletionNamedToolChoice"
},
{
"type": "null"
}
],
"title": "Tool Choice",
"description": "A tool to apply to the generated completions."
},
"max_tokens": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Max Tokens",
"description": "The maximum number of tokens to generate. Defaults to inf"
},
"min_tokens": {
"type": "integer",
"minimum": 0.0,
"title": "Min Tokens",
"description": "The minimum number of tokens to generate. It may return fewer tokens if another condition is met (e.g. max_tokens, stop).",
"default": 0
},
"logprobs": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"title": "Logprobs",
"description": "Whether to output the logprobs or not. Default is True",
"default": false
},
"top_logprobs": {
"anyOf": [
{
"type": "integer",
"minimum": 0.0
},
{
"type": "null"
}
],
"title": "Top Logprobs",
"description": "The number of logprobs to generate. If None, no logprobs are generated. logprobs need to set to True."
},
"temperature": {
"type": "number",
"title": "Temperature",
"description": "Adjust the randomness of the generated text.\n\nTemperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
"default": 0.8
},
"top_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Top P",
"description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\nTop-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.",
"default": 0.95
},
"min_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Min P",
"description": "Sets a minimum base probability threshold for token selection.\n\nThe Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.",
"default": 0.05
},
"stop": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Stop",
"description": "A list of tokens at which to stop generation. If None, no stop tokens are used."
},
"stream": {
"type": "boolean",
"title": "Stream",
"description": "Whether to stream the results as they are generated. Useful for chatbots.",
"default": false
},
"stream_options": {
"anyOf": [
{
"$ref": "#/components/schemas/StreamOptions"
},
{
"type": "null"
}
],
"description": "Options for streaming response. Only set this when you set stream: true."
},
"presence_penalty": {
"anyOf": [
{
"type": "number",
"maximum": 2.0,
"minimum": -2.0
},
{
"type": "null"
}
],
"title": "Presence Penalty",
"description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
"default": 0.0
},
"frequency_penalty": {
"anyOf": [
{
"type": "number",
"maximum": 2.0,
"minimum": -2.0
},
{
"type": "null"
}
],
"title": "Frequency Penalty",
"description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
"default": 0.0
},
"logit_bias": {
"anyOf": [
{
"additionalProperties": {
"type": "number"
},
"type": "object"
},
{
"type": "null"
}
],
"title": "Logit Bias"
},
"seed": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Seed"
},
"response_format": {
"anyOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestResponseFormat"
},
{
"type": "null"
}
]
},
"model": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model",
"description": "The model to use for generating completions."
},
"n": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "N",
"default": 1
},
"user": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "User"
},
"top_k": {
"type": "integer",
"minimum": 0.0,
"title": "Top K",
"description": "Limit the next token selection to the K most probable tokens.\n\nTop-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.",
"default": 40
},
"repeat_penalty": {
"type": "number",
"minimum": 0.0,
"title": "Repeat Penalty",
"description": "A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\nRepeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.",
"default": 1.1
},
"logit_bias_type": {
"anyOf": [
{
"type": "string",
"enum": ["input_ids", "tokens"]
},
{
"type": "null"
}
],
"title": "Logit Bias Type"
},
"mirostat_mode": {
"type": "integer",
"maximum": 2.0,
"minimum": 0.0,
"title": "Mirostat Mode",
"description": "Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)",
"default": 0
},
"mirostat_tau": {
"type": "number",
"maximum": 10.0,
"minimum": 0.0,
"title": "Mirostat Tau",
"description": "Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text",
"default": 5.0
},
"mirostat_eta": {
"type": "number",
"maximum": 1.0,
"minimum": 0.001,
"title": "Mirostat Eta",
"description": "Mirostat learning rate",
"default": 0.1
},
"grammar": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Grammar"
}
},
"type": "object",
"title": "CreateChatCompletionRequest",
"examples": [
{
"messages": [
{
"content": "You are a helpful assistant.",
"role": "system"
},
{
"content": "What is the capital of France?",
"role": "user"
}
]
}
]
},
"CreateChatCompletionResponse": {
"properties": {
"id": {
"type": "string",
"title": "Id"
},
"object": {
"type": "string",
"const": "chat.completion",
"title": "Object"
},
"created": {
"type": "integer",
"title": "Created"
},
"model": {
"type": "string",
"title": "Model"
},
"choices": {
"items": {
"$ref": "#/components/schemas/ChatCompletionResponseChoice"
},
"type": "array",
"title": "Choices"
},
"usage": {
"$ref": "#/components/schemas/CompletionUsage"
}
},
"type": "object",
"required": ["id", "object", "created", "model", "choices", "usage"],
"title": "CreateChatCompletionResponse"
},
"CreateCompletionRequest": {
"properties": {
"prompt": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
}
],
"title": "Prompt",
"description": "The prompt to generate completions for.",
"default": ""
},
"suffix": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Suffix",
"description": "A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots."
},
"max_tokens": {
"anyOf": [
{
"type": "integer",
"minimum": 0.0
},
{
"type": "null"
}
],
"title": "Max Tokens",
"description": "The maximum number of tokens to generate.",
"default": 16
},
"min_tokens": {
"type": "integer",
"minimum": 0.0,
"title": "Min Tokens",
"description": "The minimum number of tokens to generate. It may return fewer tokens if another condition is met (e.g. max_tokens, stop).",
"default": 0
},
"temperature": {
"type": "number",
"title": "Temperature",
"description": "Adjust the randomness of the generated text.\n\nTemperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
"default": 0.8
},
"top_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Top P",
"description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\nTop-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.",
"default": 0.95
},
"min_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Min P",
"description": "Sets a minimum base probability threshold for token selection.\n\nThe Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.",
"default": 0.05
},
"echo": {
"type": "boolean",
"title": "Echo",
"description": "Whether to echo the prompt in the generated text. Useful for chatbots.",
"default": false
},
"stop": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"title": "Stop",
"description": "A list of tokens at which to stop generation. If None, no stop tokens are used."
},
"stream": {
"type": "boolean",
"title": "Stream",
"description": "Whether to stream the results as they are generated. Useful for chatbots.",
"default": false
},
"stream_options": {
"anyOf": [
{
"$ref": "#/components/schemas/StreamOptions"
},
{
"type": "null"
}
],
"description": "Options for streaming response. Only set this when you set stream: true."
},
"logprobs": {
"anyOf": [
{
"type": "integer",
"minimum": 0.0
},
{
"type": "null"
}
],
"title": "Logprobs",
"description": "The number of logprobs to generate. If None, no logprobs are generated."
},
"presence_penalty": {
"anyOf": [
{
"type": "number",
"maximum": 2.0,
"minimum": -2.0
},
{
"type": "null"
}
],
"title": "Presence Penalty",
"description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
"default": 0.0
},
"frequency_penalty": {
"anyOf": [
{
"type": "number",
"maximum": 2.0,
"minimum": -2.0
},
{
"type": "null"
}
],
"title": "Frequency Penalty",
"description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
"default": 0.0
},
"logit_bias": {
"anyOf": [
{
"additionalProperties": {
"type": "number"
},
"type": "object"
},
{
"type": "null"
}
],
"title": "Logit Bias"
},
"seed": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Seed"
},
"model": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model",
"description": "The model to use for generating completions."
},
"n": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "N",
"default": 1
},
"best_of": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Best Of",
"default": 1
},
"user": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "User"
},
"top_k": {
"type": "integer",
"minimum": 0.0,
"title": "Top K",
"description": "Limit the next token selection to the K most probable tokens.\n\nTop-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.",
"default": 40
},
"repeat_penalty": {
"type": "number",
"minimum": 0.0,
"title": "Repeat Penalty",
"description": "A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\nRepeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.",
"default": 1.1
},
"logit_bias_type": {
"anyOf": [
{
"type": "string",
"enum": ["input_ids", "tokens"]
},
{
"type": "null"
}
],
"title": "Logit Bias Type"
},
"mirostat_mode": {
"type": "integer",
"maximum": 2.0,
"minimum": 0.0,
"title": "Mirostat Mode",
"description": "Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)",
"default": 0
},
"mirostat_tau": {
"type": "number",
"maximum": 10.0,
"minimum": 0.0,
"title": "Mirostat Tau",
"description": "Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text",
"default": 5.0
},
"mirostat_eta": {
"type": "number",
"maximum": 1.0,
"minimum": 0.001,
"title": "Mirostat Eta",
"description": "Mirostat learning rate",
"default": 0.1
},
"grammar": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Grammar"
}
},
"type": "object",
"title": "CreateCompletionRequest",
"examples": [
{
"prompt": "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n",
"stop": ["\n", "###"]
}
]
},
"CreateCompletionResponse": {
"properties": {
"id": {
"type": "string",
"title": "Id"
},
"object": {
"type": "string",
"const": "text_completion",
"title": "Object"
},
"created": {
"type": "integer",
"title": "Created"
},
"model": {
"type": "string",
"title": "Model"
},
"choices": {
"items": {
"$ref": "#/components/schemas/CompletionChoice"
},
"type": "array",
"title": "Choices"
},
"usage": {
"$ref": "#/components/schemas/CompletionUsage"
}
},
"type": "object",
"required": ["id", "object", "created", "model", "choices"],
"title": "CreateCompletionResponse"
},
"CreateEmbeddingRequest": {
"properties": {
"model": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model",
"description": "The model to use for generating completions."
},
"input": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
}
],
"title": "Input",
"description": "The input to embed."
},
"user": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "User"
}
},
"type": "object",
"required": ["input"],
"title": "CreateEmbeddingRequest",
"examples": [
{
"input": "The food was delicious and the waiter..."
}
]
},
"DetokenizeInputRequest": {
"properties": {
"model": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model",
"description": "The model to use for generating completions."
},
"tokens": {
"items": {
"type": "integer"
},
"type": "array",
"title": "Tokens",
"description": "A list of toekns to detokenize."
}
},
"type": "object",
"required": ["tokens"],
"title": "DetokenizeInputRequest",
"example": [
{
"tokens": [123, 321, 222]
}
]
},
"DetokenizeInputResponse": {
"properties": {
"text": {
"type": "string",
"title": "Text",
"description": "The detokenized text."
}
},
"type": "object",
"required": ["text"],
"title": "DetokenizeInputResponse",
"example": {
"text": "How many tokens in this query?"
}
},
"HTTPValidationError": {
"properties": {
"detail": {
"items": {
"$ref": "#/components/schemas/ValidationError"
},
"type": "array",
"title": "Detail"
}
},
"type": "object",
"title": "HTTPValidationError"
},
"ModelData": {
"properties": {
"id": {
"type": "string",
"title": "Id"
},
"object": {
"type": "string",
"const": "model",
"title": "Object"
},
"owned_by": {
"type": "string",
"title": "Owned By"
},
"permissions": {
"items": {
"type": "string"
},
"type": "array",
"title": "Permissions"
}
},
"type": "object",
"required": ["id", "object", "owned_by", "permissions"],
"title": "ModelData"
},
"ModelList": {
"properties": {
"object": {
"type": "string",
"const": "list",
"title": "Object"
},
"data": {
"items": {
"$ref": "#/components/schemas/ModelData"
},
"type": "array",
"title": "Data"
}
},
"type": "object",
"required": ["object", "data"],
"title": "ModelList"
},
"StreamOptions": {
"properties": {
"include_usage": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"title": "Include Usage"
}
},
"type": "object",
"required": ["include_usage"],
"title": "StreamOptions"
},
"TokenizeInputCountResponse": {
"properties": {
"count": {
"type": "integer",
"title": "Count",
"description": "The number of tokens in the input."
}
},
"type": "object",
"required": ["count"],
"title": "TokenizeInputCountResponse",
"example": {
"count": 5
}
},
"TokenizeInputRequest": {
"properties": {
"model": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model",
"description": "The model to use for generating completions."
},
"input": {
"type": "string",
"title": "Input",
"description": "The input to tokenize."
}
},
"type": "object",
"required": ["input"],
"title": "TokenizeInputRequest",
"examples": [
{
"input": "How many tokens in this query?"
}
]
},
"TokenizeInputResponse": {
"properties": {
"tokens": {
"items": {
"type": "integer"
},
"type": "array",
"title": "Tokens",
"description": "A list of tokens."
}
},
"type": "object",
"required": ["tokens"],
"title": "TokenizeInputResponse",
"example": {
"tokens": [123, 321, 222]
}
},
"ValidationError": {
"properties": {
"loc": {
"items": {
"anyOf": [
{
"type": "string"
},
{
"type": "integer"
}
]
},
"type": "array",
"title": "Location"
},
"msg": {
"type": "string",
"title": "Message"
},
"type": {
"type": "string",
"title": "Error Type"
}
},
"type": "object",
"required": ["loc", "msg", "type"],
"title": "ValidationError"
}
},
"securitySchemes": {
"HTTPBearer": {
"type": "http",
"scheme": "bearer"
}
}
}
}
================================================
FILE: packages/backend/src/extension.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, test, vi } from 'vitest';
import type { ExtensionContext } from '@podman-desktop/api';
import { activate, deactivate } from './extension';
import { Studio } from './studio';
vi.mock('./studio');
beforeEach(() => {
vi.clearAllMocks();
});
test('check we call activate method on studio instance', async () => {
const fakeContext = {} as unknown as ExtensionContext;
await activate(fakeContext);
// expect the activate method to be called on the studio mock
expect(Studio.prototype.activate).toBeCalledTimes(1);
// no call on deactivate
expect(Studio.prototype.deactivate).not.toBeCalled();
});
test('check we call deactivate method on studio instance ', async () => {
await deactivate();
// expect the activate method to be called on the studio mock
expect(Studio.prototype.deactivate).toBeCalledTimes(1);
// no call on activate
expect(Studio.prototype.activate).not.toBeCalled();
});
================================================
FILE: packages/backend/src/extension.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { ExtensionContext } from '@podman-desktop/api';
import { Studio } from './studio';
let studio: Studio | undefined;
export async function activate(extensionContext: ExtensionContext): Promise {
studio = new Studio(extensionContext);
await studio?.activate();
}
export async function deactivate(): Promise {
await studio?.deactivate();
}
================================================
FILE: packages/backend/src/instructlab-api-impl.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { InstructlabAPI } from '@shared/InstructlabAPI';
import type { InstructlabManager } from './managers/instructlab/instructlabManager';
import type { InstructlabSession } from '@shared/models/instructlab/IInstructlabSession';
import type { InstructlabContainerConfiguration } from '@shared/models/instructlab/IInstructlabContainerConfiguration';
import { navigation } from '@podman-desktop/api';
export class InstructlabApiImpl implements InstructlabAPI {
constructor(private instructlabManager: InstructlabManager) {}
async getIsntructlabSessions(): Promise {
return this.instructlabManager.getSessions();
}
requestCreateInstructlabContainer(config: InstructlabContainerConfiguration): Promise {
return this.instructlabManager.requestCreateInstructlabContainer(config);
}
routeToInstructLabContainerTerminal(containerId: string): Promise {
return navigation.navigateToContainerTerminal(containerId);
}
getInstructlabContainerId(): Promise {
return this.instructlabManager.getInstructLabContainer();
}
}
================================================
FILE: packages/backend/src/llama-stack-api-impl.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { navigation } from '@podman-desktop/api';
import type { LlamaStackAPI } from '@shared/LlamaStackAPI';
import type { LlamaStackContainerConfiguration } from '@shared/models/llama-stack/LlamaStackContainerConfiguration';
import type { LlamaStackManager } from './managers/llama-stack/llamaStackManager';
import type { LlamaStackContainers } from '@shared/models/llama-stack/LlamaStackContainerInfo';
export class LlamaStackApiImpl implements LlamaStackAPI {
constructor(private llamaStackManager: LlamaStackManager) {}
requestcreateLlamaStackContainerss(config: LlamaStackContainerConfiguration): Promise {
return this.llamaStackManager.requestcreateLlamaStackContainerss(config);
}
routeToLlamaStackContainerTerminal(containerId: string): Promise {
return navigation.navigateToContainerTerminal(containerId);
}
getLlamaStackContainersInfo(): Promise {
return this.llamaStackManager.getLlamaStackContainers();
}
}
================================================
FILE: packages/backend/src/managers/GPUManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { expect, test, vi, beforeEach } from 'vitest';
import { GPUManager } from './GPUManager';
import { graphics, type Systeminformation } from 'systeminformation';
import { GPUVendor } from '@shared/models/IGPUInfo';
import type { RpcExtension } from '@shared/messages/MessageProxy';
vi.mock('../utils/inferenceUtils', () => ({
getProviderContainerConnection: vi.fn(),
getImageInfo: vi.fn(),
}));
vi.mock('@podman-desktop/api', async () => {
return {
env: {
isWindows: false,
},
};
});
vi.mock('systeminformation', () => ({
graphics: vi.fn(),
}));
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
});
test('post constructor should have no items', () => {
const manager = new GPUManager(rpcExtensionMock);
expect(manager.getAll().length).toBe(0);
});
test('no controller should return empty array', async () => {
vi.mocked(graphics).mockResolvedValue({
controllers: [],
displays: [],
});
const manager = new GPUManager(rpcExtensionMock);
expect(await manager.collectGPUs()).toHaveLength(0);
});
test('intel controller should return intel vendor', async () => {
vi.mocked(graphics).mockResolvedValue({
controllers: [
{
vendor: 'Intel Corporation',
model: 'intel model',
vram: 1024,
} as unknown as Systeminformation.GraphicsControllerData,
],
displays: [],
});
const manager = new GPUManager(rpcExtensionMock);
expect(await manager.collectGPUs()).toStrictEqual([
{
vendor: GPUVendor.INTEL,
model: 'intel model',
vram: 1024,
},
]);
});
test('NVIDIA controller should return intel vendor', async () => {
vi.mocked(graphics).mockResolvedValue({
controllers: [
{
vendor: 'NVIDIA',
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
} as unknown as Systeminformation.GraphicsControllerData,
],
displays: [],
});
const manager = new GPUManager(rpcExtensionMock);
expect(await manager.collectGPUs()).toStrictEqual([
{
vendor: GPUVendor.NVIDIA,
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
},
]);
});
test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => {
vi.mocked(graphics).mockResolvedValue({
controllers: [
{
vendor: 'NVIDIA Corporation',
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
} as unknown as Systeminformation.GraphicsControllerData,
],
displays: [],
});
const manager = new GPUManager(rpcExtensionMock);
expect(await manager.collectGPUs()).toStrictEqual([
{
vendor: GPUVendor.NVIDIA,
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
},
]);
});
================================================
FILE: packages/backend/src/managers/GPUManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { type Disposable } from '@podman-desktop/api';
import { GPUVendor, type IGPUInfo } from '@shared/models/IGPUInfo';
import { Publisher } from '../utils/Publisher';
import { graphics } from 'systeminformation';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_GPUS_UPDATE } from '@shared/Messages';
/**
* @experimental
*/
export class GPUManager extends Publisher implements Disposable {
#gpus: IGPUInfo[];
constructor(rpcExtension: RpcExtension) {
super(rpcExtension, MSG_GPUS_UPDATE, () => this.getAll());
// init properties
this.#gpus = [];
}
dispose(): void {}
getAll(): IGPUInfo[] {
return this.#gpus;
}
async collectGPUs(): Promise {
const { controllers } = await graphics();
return controllers.map(controller => ({
vendor: this.getVendor(controller.vendor),
model: controller.model,
vram: controller.vram ?? undefined,
}));
}
protected getVendor(raw: string): GPUVendor {
switch (raw) {
case 'Intel Corporation':
return GPUVendor.INTEL;
case 'NVIDIA':
case 'NVIDIA Corporation':
return GPUVendor.NVIDIA;
case 'Apple':
return GPUVendor.APPLE;
default:
return GPUVendor.UNKNOWN;
}
}
}
================================================
FILE: packages/backend/src/managers/SnippetManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, test, vi } from 'vitest';
import { SnippetManager } from './SnippetManager';
import type { TelemetryLogger } from '@podman-desktop/api';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_SUPPORTED_LANGUAGES_UPDATE } from '@shared/Messages';
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
const telemetryMock = {
logUsage: vi.fn(),
logError: vi.fn(),
} as unknown as TelemetryLogger;
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
});
test('expect init to notify webview', () => {
const manager = new SnippetManager(rpcExtensionMock, telemetryMock);
manager.init();
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_SUPPORTED_LANGUAGES_UPDATE, manager.getLanguageList());
});
test('expect postman-code-generators to have many languages available.', () => {
const manager = new SnippetManager(rpcExtensionMock, telemetryMock);
manager.init();
expect(manager.getLanguageList().length).toBeGreaterThan(0);
});
test('expect postman-code-generators to have nodejs supported.', () => {
const manager = new SnippetManager(rpcExtensionMock, telemetryMock);
manager.init();
const languages = manager.getLanguageList();
const nodejs = languages.find(language => language.key === 'nodejs');
expect(nodejs).toBeDefined();
expect(nodejs?.variants.length).toBeGreaterThan(0);
const native = nodejs?.variants.find(variant => variant.key === 'Request');
expect(native).toBeDefined();
});
test('expect postman-code-generators to generate proper nodejs native code', async () => {
const manager = new SnippetManager(rpcExtensionMock, telemetryMock);
manager.init();
const snippet = await manager.generate(
{
url: 'http://localhost:8080',
},
'nodejs',
'Request',
);
expect(snippet).toBe(`var request = require('request');
var options = {
'method': 'GET',
'url': 'http://localhost:8080',
'headers': {
}
};
request(options, function (error, response) {
if (error) throw new Error(error);
console.log(response.body);
});
`);
});
test('expect snippet manager to have Quarkus Langchain4J supported.', () => {
const manager = new SnippetManager(rpcExtensionMock, telemetryMock);
manager.init();
const languages = manager.getLanguageList();
const java = languages.find(language => language.key === 'java');
expect(java).toBeDefined();
expect(java?.variants.length).toBeGreaterThan(0);
const quarkus_langchain4j = java?.variants.find(variant => variant.key === 'Quarkus Langchain4J');
expect(quarkus_langchain4j).toBeDefined();
});
test('expect new variant to replace existing one if same name', () => {
const manager = new SnippetManager(rpcExtensionMock, telemetryMock);
manager.init();
const languages = manager.getLanguageList();
const java = languages.find(language => language.key === 'java');
expect(java).toBeDefined();
expect(java?.variants.length).toBeGreaterThan(0);
if (!java) throw new Error('undefined java');
const oldVariantsNumber = java.variants.length;
manager.addVariant('java', java.variants[0].key, vi.fn());
const languages_updated = manager.getLanguageList();
const java_updated = languages_updated.find(language => language.key === 'java');
expect(java_updated).toBeDefined();
expect(java_updated?.variants.length).equals(oldVariantsNumber);
});
================================================
FILE: packages/backend/src/managers/SnippetManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { Disposable, TelemetryLogger } from '@podman-desktop/api';
import { getLanguageList, convert, type Language } from 'postman-code-generators';
import { Request } from 'postman-collection';
import { Publisher } from '../utils/Publisher';
import type { RequestOptions } from '@shared/models/RequestOptions';
import { quarkusLangchain4Jgenerator } from './snippets/quarkus-snippet';
import { javaOkHttpGenerator } from './snippets/java-okhttp-snippet';
import { pythonLangChainGenerator } from './snippets/python-langchain-snippet';
import { MSG_SUPPORTED_LANGUAGES_UPDATE } from '@shared/Messages';
import type { RpcExtension } from '@shared/messages/MessageProxy';
type Generator = (requestOptions: RequestOptions) => Promise;
export class SnippetManager extends Publisher implements Disposable {
#languages: Language[];
#additionalGenerators: Map;
constructor(
rpcExtension: RpcExtension,
private telemetry: TelemetryLogger,
) {
super(rpcExtension, MSG_SUPPORTED_LANGUAGES_UPDATE, () => this.getLanguageList());
this.#languages = [];
this.#additionalGenerators = new Map();
}
addVariant(key: string, variant: string, generator: Generator): void {
const original = this.#languages;
const language = original.find((lang: Language) => lang.key === key);
if (language) {
if (!language.variants.find(v => v.key === variant)) {
language.variants.push({ key: variant });
}
this.#additionalGenerators.set(`${key}/${variant}`, generator);
}
}
getLanguageList(): Language[] {
return this.#languages;
}
async generate(requestOptions: RequestOptions, language: string, variant: string): Promise {
this.telemetry.logUsage('snippet.generate', { language: language, variant: variant });
const generator = this.#additionalGenerators.get(`${language}/${variant}`);
if (generator) {
return generator(requestOptions);
}
return new Promise((resolve, reject) => {
const request = new Request(requestOptions);
convert(language, variant, request, {}, (error: unknown, snippet: string | undefined) => {
if (error) {
reject(error);
return;
} else if (snippet === undefined) {
throw new Error('undefined snippet');
}
resolve(snippet);
});
});
}
init(): void {
this.#languages = getLanguageList();
this.addVariant('java', 'Quarkus Langchain4J', quarkusLangchain4Jgenerator);
this.addVariant('java', 'OkHttp', javaOkHttpGenerator);
this.addVariant('python', 'Python LangChain', pythonLangChainGenerator);
// Notify the publisher
this.notify();
}
dispose(): void {}
}
================================================
FILE: packages/backend/src/managers/TaskRunner.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, test, vi } from 'vitest';
import type { TaskRegistry } from '../registries/TaskRegistry';
import { TaskRunner } from './TaskRunner';
import type { TaskRunnerTools } from '../models/TaskRunner';
import type { Task } from '@shared/models/ITask';
const taskRegistry = {
createTask: vi.fn(),
updateTask: vi.fn(),
getTasksByLabels: vi.fn(),
} as unknown as TaskRegistry;
const runner = vi.fn<(tools: TaskRunnerTools) => Promise>();
let taskRunner: TaskRunner;
beforeEach(() => {
vi.resetAllMocks();
taskRunner = new TaskRunner(taskRegistry);
});
test('runner terminates with no successLabel', async () => {
vi.mocked(taskRegistry.createTask).mockReturnValue({
id: 'task1',
name: 'Loading...',
state: 'loading',
});
runner.mockResolvedValue();
const labels = {
label1: 'value1',
label2: 'value2',
};
await taskRunner.runAsTask(
labels,
{
loadingLabel: 'Loading...',
errorMsg: err => `an error: ${err}`,
},
runner,
);
expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels);
expect(taskRegistry.updateTask).toHaveBeenCalledWith({
id: 'task1',
name: 'Loading...',
state: 'success',
});
});
test('runner terminates with successLabel', async () => {
vi.mocked(taskRegistry.createTask).mockReturnValue({
id: 'task1',
name: 'Loading...',
state: 'loading',
});
runner.mockResolvedValue();
const labels = {
label1: 'value1',
label2: 'value2',
};
await taskRunner.runAsTask(
labels,
{
loadingLabel: 'Loading...',
successLabel: 'Success!!',
errorMsg: err => `an error: ${err}`,
},
runner,
);
expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels);
expect(taskRegistry.updateTask).toHaveBeenCalledWith({
id: 'task1',
name: 'Success!!',
state: 'success',
});
});
test('runner throws with no errorLabel', async () => {
vi.mocked(taskRegistry.createTask).mockReturnValue({
id: 'task1',
name: 'Loading...',
state: 'loading',
});
runner.mockRejectedValue('something goes wrong');
const labels = {
label1: 'value1',
label2: 'value2',
};
await expect(() =>
taskRunner.runAsTask(
labels,
{
loadingLabel: 'Loading...',
errorMsg: err => `an error: ${err}`,
},
runner,
),
).rejects.toThrow();
expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels);
expect(taskRegistry.updateTask).toHaveBeenCalledWith({
id: 'task1',
name: 'Loading...',
state: 'error',
error: 'an error: something goes wrong',
});
});
test('runner throws with errorLabel', async () => {
vi.mocked(taskRegistry.createTask).mockReturnValue({
id: 'task1',
name: 'Loading...',
state: 'loading',
});
runner.mockRejectedValue('something goes wrong');
const labels = {
label1: 'value1',
label2: 'value2',
};
await expect(() =>
taskRunner.runAsTask(
labels,
{
loadingLabel: 'Loading...',
errorLabel: 'Failed :(',
errorMsg: err => `an error: ${err}`,
},
runner,
),
).rejects.toThrow();
expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels);
expect(taskRegistry.updateTask).toHaveBeenCalledWith({
id: 'task1',
name: 'Failed :(',
state: 'error',
error: 'an error: something goes wrong',
});
});
test('updateLabels', async () => {
vi.mocked(taskRegistry.createTask).mockReturnValue({
id: 'task1',
name: 'Loading...',
state: 'loading',
});
runner.mockImplementation(async ({ updateLabels }) => {
updateLabels(labels => ({ ...labels, newLabel: 'newValue' }));
});
const labels = {
label1: 'value1',
label2: 'value2',
};
await taskRunner.runAsTask(
labels,
{
loadingLabel: 'Loading...',
errorMsg: err => `an error: ${err}`,
},
runner,
);
expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels);
expect(taskRegistry.updateTask).toHaveBeenCalledWith({
id: 'task1',
name: 'Loading...',
state: 'success',
labels: {
label1: 'value1',
label2: 'value2',
newLabel: 'newValue',
},
});
expect(taskRegistry.updateTask).toHaveBeenCalledWith({
id: 'task1',
name: 'Loading...',
state: 'success',
labels: {
label1: 'value1',
label2: 'value2',
newLabel: 'newValue',
},
});
});
test.each<{ failFast: boolean }>([
{
failFast: true,
},
{
failFast: false,
},
])('failFastSubtasks $failFast', async ({ failFast }) => {
vi.mocked(taskRegistry.createTask).mockReturnValue({
id: 'task1',
name: 'Loading...',
state: 'loading',
});
const otherTasks: Task[] = [
{
id: 'subtask1',
name: 'Sub task 1',
state: 'loading',
},
{
id: 'subtask2',
name: 'Sub task 2',
state: 'loading',
},
{
id: 'subtask3',
name: 'Sub task 3',
state: 'error',
},
];
vi.mocked(taskRegistry.getTasksByLabels).mockReturnValue(otherTasks);
runner.mockRejectedValue('something goes wrong');
const labels = {
label1: 'value1',
label2: 'value2',
};
await expect(() =>
taskRunner.runAsTask(
labels,
{
loadingLabel: 'Loading...',
errorMsg: err => `an error: ${err}`,
failFastSubtasks: failFast,
},
runner,
),
).rejects.toThrow();
expect(taskRegistry.createTask).toHaveBeenCalledWith('Loading...', 'loading', labels);
if (failFast) {
expect(taskRegistry.updateTask).toHaveBeenCalledTimes(3);
expect(taskRegistry.updateTask).toHaveBeenNthCalledWith(1, { ...otherTasks[0], state: 'error' });
expect(taskRegistry.updateTask).toHaveBeenNthCalledWith(2, { ...otherTasks[1], state: 'error' });
expect(taskRegistry.updateTask).toHaveBeenNthCalledWith(3, {
id: 'task1',
name: 'Loading...',
state: 'error',
error: 'an error: something goes wrong',
});
} else {
expect(taskRegistry.updateTask).toHaveBeenCalledExactlyOnceWith({
id: 'task1',
name: 'Loading...',
state: 'error',
error: 'an error: something goes wrong',
});
}
});
================================================
FILE: packages/backend/src/managers/TaskRunner.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { RunAsTaskOptions, TaskRunnerTools } from '../models/TaskRunner';
import type { TaskRegistry } from '../registries/TaskRegistry';
export class TaskRunner {
constructor(private taskRegistry: TaskRegistry) {}
async runAsTask(
labels: Record,
options: RunAsTaskOptions,
run: (tools: TaskRunnerTools) => Promise,
): Promise {
const tools = {
updateLabels: (f: (labels: Record) => Record): void => {
task.labels = f(labels);
this.taskRegistry.updateTask(task);
},
};
const task = this.taskRegistry.createTask(options.loadingLabel, 'loading', labels);
try {
const result = await run(tools);
task.state = 'success';
if (options.successLabel) {
task.name = options.successLabel;
}
return result;
} catch (err: unknown) {
task.state = 'error';
task.error = options.errorMsg(err);
if (options.errorLabel) {
task.name = options.errorLabel;
}
if (options.failFastSubtasks) {
this.failFastSubtasks(labels);
}
throw err;
} finally {
task.progress = undefined;
this.taskRegistry.updateTask(task);
}
}
private failFastSubtasks(labels: Record): void {
const tasks = this.taskRegistry.getTasksByLabels(labels);
// Filter the one no in loading state
tasks
.filter(t => t.state === 'loading')
.forEach(t => {
this.taskRegistry.updateTask({
...t,
state: 'error',
});
});
}
}
================================================
FILE: packages/backend/src/managers/apiServer.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
/* eslint-disable sonarjs/no-nested-functions */
import { afterEach, assert, beforeEach, describe, expect, test, vi } from 'vitest';
import { ApiServer, PREFERENCE_RANDOM_PORT } from './apiServer';
import request from 'supertest';
import type * as podmanDesktopApi from '@podman-desktop/api';
import path from 'node:path';
import type { Server } from 'node:http';
import type { ModelsManager } from './modelsManager';
import type { EventEmitter } from 'node:events';
import { once } from 'node:events';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import type { AddressInfo } from 'node:net';
import type { CatalogManager } from './catalogManager';
import type { Downloader } from '../utils/downloader';
import type { ProgressEvent } from '../models/baseEvent';
import type { InferenceManager } from './inference/inferenceManager';
import type { ContainerHealthy, ContainerRegistry } from '../registries/ContainerRegistry';
import type { InferenceServer } from '@shared/models/IInference';
import OpenAI from 'openai';
import type { ChatCompletion, ChatCompletionChunk } from 'openai/resources';
import { Stream } from 'openai/streaming';
vi.mock('openai', () => {
const OpenAI = vi.fn();
OpenAI.prototype = {
chat: {
completions: {
create: vi.fn(),
},
},
};
return { default: OpenAI };
});
class TestApiServer extends ApiServer {
public override getListener(): Server | undefined {
return super.getListener();
}
}
const extensionContext = {} as unknown as podmanDesktopApi.ExtensionContext;
let server: TestApiServer;
const modelsManager = {
getModelsInfo: vi.fn(),
isModelOnDisk: vi.fn(),
createDownloader: vi.fn(),
getLocalModelsFromDisk: vi.fn(),
sendModelsInfo: vi.fn(),
} as unknown as ModelsManager;
const catalogManager = {
getModelByName: vi.fn(),
} as unknown as CatalogManager;
const inferenceManager = {
getServers: vi.fn(),
createInferenceServer: vi.fn(),
startInferenceServer: vi.fn(),
} as unknown as InferenceManager;
const configurationRegistry = {
getExtensionConfiguration: () => {
return {
apiPort: PREFERENCE_RANDOM_PORT,
};
},
} as unknown as ConfigurationRegistry;
const containerRegistry = {
onHealthyContainerEvent: vi.fn(),
} as unknown as ContainerRegistry;
beforeEach(async () => {
vi.clearAllMocks();
server = new TestApiServer(
extensionContext,
modelsManager,
catalogManager,
inferenceManager,
configurationRegistry,
containerRegistry,
);
vi.spyOn(server, 'getSpecFile').mockReturnValue(path.join(__dirname, '../../../../api/openapi.yaml'));
vi.spyOn(server, 'getPackageFile').mockReturnValue(path.join(__dirname, '../../../../package.json'));
await server.init();
await new Promise(resolve => setTimeout(resolve, 0)); // wait for random port to be set
});
afterEach(async () => {
server.dispose();
await once(server.getListener() as EventEmitter, 'close');
});
test('/spec endpoint', async () => {
expect(server.getListener()).toBeDefined();
const res = await request(server.getListener()!)
.get('/spec')
.expect(200)
.expect('Content-Type', 'application/yaml; charset=utf-8');
expect(res.text).toMatch(/^openapi:/);
});
test('/spec endpoint when spec file is not found', async () => {
expect(server.getListener()).toBeDefined();
vi.spyOn(server, 'getSpecFile').mockReturnValue(path.join(__dirname, '../../../../api/openapi-notfound.yaml'));
const res = await request(server.getListener()!).get('/spec').expect(500);
expect(res.body.message).toEqual('unable to get spec');
});
test('/spec endpoint when getting spec file fails', async () => {
expect(server.getListener()).toBeDefined();
vi.spyOn(server, 'getSpecFile').mockImplementation(() => {
throw new Error('an error getting spec file');
});
const res = await request(server.getListener()!).get('/spec').expect(500);
expect(res.body.message).toEqual('unable to get spec');
expect(res.body.errors[0]).toEqual('an error getting spec file');
});
test('/api/version endpoint', async () => {
expect(server.getListener()).toBeDefined();
const res = await request(server.getListener()!)
.get('/api/version')
.expect(200)
.expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.version).toBeDefined();
});
test('/api/version endpoint when package.json file is not found', async () => {
expect(server.getListener()).toBeDefined();
vi.spyOn(server, 'getPackageFile').mockReturnValue(path.join(__dirname, '../../../../package-notfound.json'));
const res = await request(server.getListener()!).get('/api/version').expect(500);
expect(res.body.message).toEqual('unable to get version');
});
test('/api/version endpoint when getting package.json file fails', async () => {
expect(server.getListener()).toBeDefined();
vi.spyOn(server, 'getPackageFile').mockImplementation(() => {
throw new Error('an error getting package file');
});
const res = await request(server.getListener()!).get('/api/version').expect(500);
expect(res.body.message).toEqual('unable to get version');
expect(res.body.errors[0]).toEqual('an error getting package file');
});
test('/api/version endpoint with unexpected param', async () => {
expect(server.getListener()).toBeDefined();
const res = await request(server.getListener()!).get('/api/version?wrongParam').expect(400);
expect(res.body.message).toEqual(`Unknown query parameter 'wrongParam'`);
});
test('/api/wrongEndpoint', async () => {
expect(server.getListener()).toBeDefined();
const res = await request(server.getListener()!).get('/api/wrongEndpoint').expect(404);
expect(res.body.message).toEqual('not found');
});
test('/', async () => {
expect(server.getListener()).toBeDefined();
await request(server.getListener()!).get('/').expect(200);
});
test('/api/tags', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(modelsManager.getModelsInfo).mockReturnValue([]);
await request(server.getListener()!).get('/api/tags').expect(200);
});
test('/api/tags returns error', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(modelsManager.getModelsInfo).mockRejectedValue({});
const res = await request(server.getListener()!).get('/api/tags').expect(500);
expect(res.body.message).toEqual('unable to get models');
});
test('/api/tags returns ok', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(modelsManager.getModelsInfo).mockReturnValue([
{
id: 'modelId',
name: 'model-name',
description: 'a description',
},
]);
vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(true);
const res = await request(server.getListener()!).get('/api/tags').expect(200);
expect(res.body).toBeDefined();
expect(res.body.models).toBeDefined();
expect(res.body.models[0]).toMatchObject({
name: 'model-name',
model: 'model-name',
});
});
test('/api-docs/9000 returns swagger UI', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(modelsManager.getModelsInfo).mockRejectedValue({});
const listener = server.getListener();
if (!listener) {
assert.fail('listener is not defined');
}
const response = await request(listener).get('/api-docs/9000/').expect(200);
expect(response.status).toBe(200);
// Ensure it returns the Swagger UI page
expect(response.text).toContain('Swagger UI');
});
test('verify listening on localhost', async () => {
expect(server.getListener()).toBeDefined();
expect((server.getListener()?.address() as AddressInfo).address).toEqual('0.0.0.0');
});
test('/api/pull returns an error if no body is passed', async () => {
expect(server.getListener()).toBeDefined();
await request(server.getListener()!).post('/api/pull').expect(415);
});
describe.each([undefined, true, false])('/api/pull endpoint, stream is %o', stream => {
test('/api/pull returns an error if the model is not known', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockImplementation(() => {
throw new Error('model unknown');
});
const req = request(server.getListener()!).post('/api/pull').send({ model: 'unknown-model-name', stream });
if (stream === false) {
const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.error).toEqual('pull model manifest: file does not exist');
} else {
const res = await req.expect(200);
const lines = res.text.split('\n');
expect(lines.length).toEqual(3);
expect(lines[0]).toEqual('{"status":"pulling manifest"}');
expect(lines[1]).toEqual('{"error":"pull model manifest: file does not exist"}');
expect(lines[2]).toEqual('');
}
});
test('/api/pull returns success if model already downloaded', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockReturnValue({
id: 'modelId',
name: 'model-name',
description: 'a description',
});
vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(true);
const req = request(server.getListener()!).post('/api/pull').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.status).toEqual('success');
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(3);
expect(lines[0]).toEqual('{"status":"pulling manifest"}');
expect(lines[1]).toEqual('{"status":"success"}');
expect(lines[2]).toEqual('');
}
});
test('/api/pull downloads model and returns success', async () => {
const getLocalModelsSpy = vi.spyOn(modelsManager, 'getLocalModelsFromDisk').mockResolvedValue();
const sendModelsInfoSpy = vi.spyOn(modelsManager, 'sendModelsInfo').mockResolvedValue();
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockReturnValue({
id: 'modelId',
name: 'model-name',
description: 'a description',
sha256: '123456',
});
vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(false);
vi.mocked(modelsManager.createDownloader).mockReturnValue({
perform: async (_name: string) => {},
onEvent: (listener: (e: ProgressEvent) => void) => {
listener({
status: 'progress',
id: 'model-name',
total: 100000,
value: 100000,
});
},
} as unknown as Downloader);
const req = request(server.getListener()!).post('/api/pull').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.status).toEqual('success');
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(4);
expect(lines[0]).toEqual('{"status":"pulling manifest"}');
expect(lines[1]).toEqual(
'{"status":"pulling 123456","digest":"sha256:123456","total":100000,"completed":100000000}',
);
expect(lines[2]).toEqual('{"status":"success"}');
expect(lines[3]).toEqual('');
}
expect(getLocalModelsSpy).toHaveBeenCalledTimes(1);
expect(sendModelsInfoSpy).toHaveBeenCalledTimes(1);
});
test('/api/pull should return an error if an error occurs during download', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockReturnValue({
id: 'modelId',
name: 'model-name',
description: 'a description',
sha256: '123456',
});
vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(false);
vi.mocked(modelsManager.createDownloader).mockReturnValue({
perform: async (_name: string) => {
await new Promise(resolve => setTimeout(resolve, 0)); // wait for random port to be set
throw new Error('an error');
},
onEvent: (listener: (e: ProgressEvent) => void) => {
listener({
status: 'progress',
id: 'model-name',
total: 100000,
value: 100000,
});
},
} as unknown as Downloader);
const req = request(server.getListener()!).post('/api/pull').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.error).toEqual('Error: an error');
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(4);
expect(lines[0]).toEqual('{"status":"pulling manifest"}');
expect(lines[1]).toEqual(
'{"status":"pulling 123456","digest":"sha256:123456","total":100000,"completed":100000000}',
);
expect(lines[2]).toEqual('{"error":"Error: an error"}');
expect(lines[3]).toEqual('');
}
});
});
describe.each([undefined, true, false])('stream is %o', stream => {
describe.each(['/api/chat', '/api/generate'])('%o endpoint', endpoint => {
test('returns an error if the model is not known', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockImplementation(() => {
throw new Error('model unknown');
});
const req = request(server.getListener()!).post(endpoint).send({ model: 'unknown-model-name', stream });
if (stream === false) {
const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.error).toEqual('chat: model "unknown-model-name" does not exist');
} else {
const res = await req.expect(200);
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"error":"chat: model \\"unknown-model-name\\" does not exist"}');
expect(lines[1]).toEqual('');
}
});
test('returns an error if model is not downloaded', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockReturnValue({
id: 'modelId',
name: 'model-name',
description: 'a description',
});
vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(false);
const req = request(server.getListener()!).post(endpoint).send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(500).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body.error).toEqual('chat: model "model-name" not found, try pulling it first');
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"error":"chat: model \\"model-name\\" not found, try pulling it first"}');
expect(lines[1]).toEqual('');
}
});
});
describe('the model is available', () => {
const onHealthyContainerEventEmptyCallback = (): podmanDesktopApi.Disposable => {
return {
dispose: vi.fn(),
};
};
const onHealthyContainerEventNonEmptyCallback = (
fn: (e: ContainerHealthy) => void,
): podmanDesktopApi.Disposable => {
setTimeout(
() =>
fn({
id: 'container1',
}),
100,
);
return {
dispose: vi.fn(),
};
};
beforeEach(() => {
expect(server.getListener()).toBeDefined();
vi.mocked(catalogManager.getModelByName).mockReturnValue({
id: 'modelId1',
name: 'model-name',
description: 'a description',
file: {
file: 'a-file-name',
path: '/path/to/model-file',
},
});
vi.mocked(modelsManager.isModelOnDisk).mockReturnValue(true);
});
describe('the service is initially not created', async () => {
beforeEach(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValueOnce([]);
});
describe('the created service is immediately healthy', () => {
beforeEach(() => {
vi.mocked(inferenceManager.createInferenceServer).mockImplementation(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValueOnce([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'running',
health: {
Status: 'healthy',
},
} as unknown as InferenceServer,
]);
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(
onHealthyContainerEventEmptyCallback,
);
return 'container1';
});
});
test('/api/generate creates the service and returns that the model is loaded', async () => {
const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' });
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}');
expect(lines[1]).toEqual('');
}
expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce();
expect(inferenceManager.createInferenceServer).toHaveBeenCalledOnce();
});
});
describe('the created service is eventually healthy', () => {
beforeEach(() => {
vi.mocked(inferenceManager.createInferenceServer).mockImplementation(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValueOnce([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'starting',
} as unknown as InferenceServer,
]);
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(
onHealthyContainerEventNonEmptyCallback,
);
return 'container1';
});
});
test('/api/generate creates the service and returns that the model is loaded', async () => {
const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' });
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}');
expect(lines[1]).toEqual('');
}
expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce();
expect(inferenceManager.createInferenceServer).toHaveBeenCalledOnce();
});
});
});
describe('the service is initially created but not started', async () => {
beforeEach(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValueOnce([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'stopped',
} as unknown as InferenceServer,
]);
});
describe('the started service is immediately healthy', () => {
beforeEach(() => {
vi.mocked(inferenceManager.startInferenceServer).mockImplementation(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValueOnce([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'running',
health: {
Status: 'healthy',
},
} as unknown as InferenceServer,
]);
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(
onHealthyContainerEventEmptyCallback,
);
});
});
test('/api/generate starts the service and returns that the model is loaded', async () => {
const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' });
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}');
expect(lines[1]).toEqual('');
}
expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce();
expect(inferenceManager.startInferenceServer).toHaveBeenCalledOnce();
});
});
describe('the started service is eventually healthy', () => {
beforeEach(() => {
vi.mocked(inferenceManager.startInferenceServer).mockImplementation(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValueOnce([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'starting',
} as unknown as InferenceServer,
]);
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(
onHealthyContainerEventNonEmptyCallback,
);
});
});
test('/api/generate starts the service and returns that the model is loaded', async () => {
const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' });
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}');
expect(lines[1]).toEqual('');
}
expect(containerRegistry.onHealthyContainerEvent).toHaveBeenCalledOnce();
expect(inferenceManager.startInferenceServer).toHaveBeenCalledOnce();
});
});
});
describe('the service is running', async () => {
beforeEach(async () => {
vi.mocked(inferenceManager.getServers).mockReturnValue([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'running',
health: {
Status: 'healthy',
},
connection: {
port: 8080,
},
} as unknown as InferenceServer,
]);
});
test('/api/generate returns that the model is loaded', async () => {
const req = request(server.getListener()!).post('/api/generate').send({ model: 'model-name', stream });
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body).toEqual({ model: 'model-name', response: '', done: true, done_reason: 'load' });
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(2);
expect(lines[0]).toEqual('{"model":"model-name","response":"","done":true,"done_reason":"load"}');
expect(lines[1]).toEqual('');
}
});
describe.each([
{
endpoint: '/api/chat',
query: {
model: 'model-name',
stream,
messages: [
{
role: 'user',
content: 'what is the question?',
},
],
},
expectedNonStreamed: {
model: 'model-name',
message: { role: 'assistant', content: 'that is a good question' },
done: true,
done_reason: 'stop',
},
expectedStreamed: [
'{"model":"model-name","message":{"role":"assistant","content":"that "},"done":false}',
'{"model":"model-name","message":{"role":"assistant","content":"is "},"done":false}',
'{"model":"model-name","message":{"role":"assistant","content":"a "},"done":false}',
'{"model":"model-name","message":{"role":"assistant","content":"good "},"done":false}',
'{"model":"model-name","message":{"role":"assistant","content":"question"},"done":false}',
'{"model":"model-name","message":{"role":"assistant","content":"."},"done":true,"done_reason":"stop"}',
'',
],
},
{
endpoint: '/api/generate',
query: { model: 'model-name', stream, prompt: 'what is the question?' },
expectedNonStreamed: {
model: 'model-name',
response: 'that is a good question',
done: true,
done_reason: 'stop',
},
expectedStreamed: [
'{"model":"model-name","response":"that ","done":false}',
'{"model":"model-name","response":"is ","done":false}',
'{"model":"model-name","response":"a ","done":false}',
'{"model":"model-name","response":"good ","done":false}',
'{"model":"model-name","response":"question","done":false}',
'{"model":"model-name","response":".","done":true,"done_reason":"stop"}',
'',
],
},
])('%o endpoint', ({ endpoint, query, expectedNonStreamed, expectedStreamed }) => {
test('calls the service and replies to the prompt', async () => {
if (stream || stream === undefined) {
const chunks = [
{
choices: [
{
delta: {
content: 'that ',
},
},
],
},
{
choices: [
{
delta: {
content: 'is ',
},
},
],
},
{
choices: [
{
delta: {
content: 'a ',
},
},
],
},
{
choices: [
{
delta: {
content: 'good ',
},
},
],
},
{
choices: [
{
delta: {
content: 'question',
},
},
],
},
{
choices: [
{
delta: {
content: '.',
},
finish_reason: 'stop',
},
],
},
] as ChatCompletionChunk[];
const asyncIterator = (async function* (): AsyncGenerator<
OpenAI.Chat.Completions.ChatCompletionChunk,
void,
unknown
> {
for (const chunk of chunks) {
yield chunk;
}
})();
const response = new Stream(() => asyncIterator, new AbortController());
vi.mocked(OpenAI.prototype.chat.completions.create).mockResolvedValue(response);
} else {
vi.mocked(OpenAI.prototype.chat.completions.create).mockResolvedValue({
id: 'id1',
choices: [
{
message: {
role: 'assistant',
content: 'that is a good question',
},
},
],
} as unknown as ChatCompletion);
}
const req = request(server.getListener()!).post(endpoint).send(query);
if (stream === false) {
const res = await req.expect(200).expect('Content-Type', 'application/json; charset=utf-8');
expect(res.body).toEqual(expectedNonStreamed);
} else {
const res = await req.expect(200).expect('transfer-encoding', 'chunked');
const lines = res.text.split('\n');
expect(lines.length).toEqual(expectedStreamed.length);
for (const [i, line] of lines.entries()) {
expect(line).toEqual(expectedStreamed[i]);
}
}
});
});
});
});
});
describe('/api/ps', () => {
test('returns an error if the model is not known', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(inferenceManager.getServers).mockImplementation(() => {
throw new Error('model unknown');
});
const res = await request(server.getListener()!).get('/api/ps').expect(500);
expect(res.body).toMatchObject({ message: 'unable to ps' });
});
test('returns empty result if no servers', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(inferenceManager.getServers).mockReturnValue([]);
const res = await request(server.getListener()!).get('/api/ps').expect(200);
expect(res.body).toEqual({ models: [] });
});
test('returns empty result if server is stopped', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(inferenceManager.getServers).mockReturnValue([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'stopped',
} as unknown as InferenceServer,
]);
const res = await request(server.getListener()!).get('/api/ps').expect(200);
expect(res.body).toEqual({ models: [] });
});
test('returns result if server is started', async () => {
expect(server.getListener()).toBeDefined();
vi.mocked(inferenceManager.getServers).mockReturnValue([
{
models: [
{
id: 'modelId1',
name: 'model-name',
description: 'model 1',
memory: 1_000_000,
},
],
container: {
engineId: 'engine1',
containerId: 'container1',
},
status: 'running',
} as unknown as InferenceServer,
]);
const res = await request(server.getListener()!).get('/api/ps').expect(200);
expect(res.body).toEqual({
models: [
{
name: 'model-name',
model: 'model-name',
size: 1_000_000,
digest: 'b48fa42fa5b28c4363747ec0797532e274650f73004383a3054697137d9d1f30',
},
],
});
});
});
================================================
FILE: packages/backend/src/managers/apiServer.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { Disposable } from '@podman-desktop/api';
import type { NextFunction, Request, Response } from 'express';
import express from 'express';
import type { Server } from 'node:http';
import path, { resolve } from 'node:path';
import http from 'node:http';
import { existsSync } from 'node:fs';
import * as podmanDesktopApi from '@podman-desktop/api';
import { readFile } from 'node:fs/promises';
import type { ModelsManager } from './modelsManager';
import type { components } from '../../src-generated/openapi';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import { getFreeRandomPort } from '../utils/ports';
import * as OpenApiValidator from 'express-openapi-validator';
import type { HttpError, OpenApiRequest } from 'express-openapi-validator/dist/framework/types';
import type { CatalogManager } from './catalogManager';
import { isProgressEvent } from '../models/baseEvent';
import type { InferenceManager } from './inference/inferenceManager';
import { withDefaultConfiguration } from '../utils/inferenceUtils';
import type { InferenceServer } from '@shared/models/IInference';
import OpenAI from 'openai';
import type { ChatCompletionMessageParam } from 'openai/resources';
import type { ContainerRegistry } from '../registries/ContainerRegistry';
import type { Stream } from 'openai/streaming';
import crypto from 'node:crypto';
import swaggerUi from 'swagger-ui-express';
import { getAbsoluteFSPath } from 'swagger-ui-dist';
import openAiApi from '../assets/openai.json';
const SHOW_API_ERROR_COMMAND = 'ai-lab.show-api-error';
export const PREFERENCE_RANDOM_PORT = 0;
type ListModelResponse = components['schemas']['ListModelResponse'];
type Message = components['schemas']['Message'];
type ProcessModelResponse = components['schemas']['ProcessModelResponse'];
interface SwaggerRequest extends Request {
swaggerDoc?: { servers: { description: string; url: string }[] };
}
function asListModelResponse(model: ModelInfo): ListModelResponse {
return {
model: model.name,
name: model.name,
digest: toDigest(model.name, model.sha256),
size: model.file?.size,
modified_at: model.file?.creation?.toISOString(),
details: {},
};
}
// ollama expect at least 12 characters for the digest
function toDigest(name: string, sha256?: string): string {
return sha256 ?? crypto.createHash('sha256').update(name).digest('hex');
}
function asProcessModelResponse(model: ModelInfo): ProcessModelResponse {
return {
name: model.name,
model: model.name,
size: model.memory,
digest: toDigest(model.name, model.sha256),
};
}
const LISTENING_ADDRESS = '0.0.0.0';
interface ChatCompletionOptions {
server: InferenceServer;
modelInfo: ModelInfo;
messages: ChatCompletionMessageParam[];
stream: boolean;
onStreamResponse: (response: Stream) => Promise;
onNonStreamResponse: (response: OpenAI.Chat.Completions.ChatCompletion) => void;
}
export class ApiServer implements Disposable {
#listener?: Server;
constructor(
private extensionContext: podmanDesktopApi.ExtensionContext,
private modelsManager: ModelsManager,
private catalogManager: CatalogManager,
private inferenceManager: InferenceManager,
private configurationRegistry: ConfigurationRegistry,
private containerRegistry: ContainerRegistry,
) {}
protected getListener(): Server | undefined {
return this.#listener;
}
async init(): Promise {
const app = express();
const router = express.Router();
router.use(express.json());
// validate requests / responses based on openapi spec
router.use(
OpenApiValidator.middleware({
apiSpec: this.getSpecFile(),
validateRequests: true,
validateResponses: {
onError: (error, body, req) => {
console.error(`Response body fails validation: `, error);
console.error(`Emitted from:`, req.originalUrl);
console.error(body);
},
},
}),
);
router.use((err: HttpError, _req: OpenApiRequest, res: Response, _next: NextFunction) => {
// format errors from validator
res.status(err.status || 500).json({
message: err.message,
errors: err.errors,
});
});
// declare routes
router.get('/version', this.getVersion.bind(this));
router.get('/tags', this.getModels.bind(this));
router.post('/pull', this.pullModel.bind(this));
router.post('/show', this.show.bind(this));
router.post('/generate', this.generate.bind(this));
router.post('/chat', this.chat.bind(this));
router.get('/ps', this.ps.bind(this));
app.get('/', (_res, res) => res.sendStatus(200)); //required for the ollama client to work against us
app.use('/api', router);
app.use('/spec', this.getSpec.bind(this));
// get swagger-ui path from dist/swagger-ui
const swaggerPath = resolve(getAbsoluteFSPath(), 'swagger-ui');
app.use(
'/api-docs/:portNumber',
function (req: SwaggerRequest, res: Response, next: NextFunction) {
if (req.path.startsWith('/swagger-ui') && req.path !== '/swagger-ui-init.js') {
// serve the swagger-ui-dist file from swaggerPath
res.sendFile(path.join(swaggerPath, req.path));
return;
}
const copyOpenAiJson = structuredClone(openAiApi);
// Extract the port number from the route parameter and set it as the server URL
const portNumber = req.params.portNumber;
copyOpenAiJson.servers = [{ description: 'AI Lab Inference Server', url: `http://localhost:${portNumber}` }];
req.swaggerDoc = copyOpenAiJson;
next();
},
swaggerUi.serveFiles(openAiApi),
swaggerUi.setup(),
);
const server = http.createServer(app);
let listeningOn = this.configurationRegistry.getExtensionConfiguration().apiPort;
server.on('error', () => {
this.displayApiError(listeningOn);
});
if (listeningOn === PREFERENCE_RANDOM_PORT) {
getFreeRandomPort(LISTENING_ADDRESS)
.then((randomPort: number) => {
listeningOn = randomPort;
this.#listener = server.listen(listeningOn, LISTENING_ADDRESS);
})
.catch((e: unknown) => {
console.error('unable to get a free port for the api server', e);
});
} else {
this.#listener = server.listen(listeningOn, LISTENING_ADDRESS);
}
}
displayApiError(port: number): void {
const apiStatusBarItem = podmanDesktopApi.window.createStatusBarItem();
apiStatusBarItem.text = `AI Lab API listening error`;
apiStatusBarItem.command = SHOW_API_ERROR_COMMAND;
this.extensionContext.subscriptions.push(
podmanDesktopApi.commands.registerCommand(SHOW_API_ERROR_COMMAND, async () => {
const address = `http://localhost:${port}`;
await podmanDesktopApi.window.showErrorMessage(
`AI Lab API failed to listen on\n${address}\nYou can change the port in the Preferences then restart the extension.`,
'OK',
);
}),
apiStatusBarItem,
);
apiStatusBarItem.show();
}
private getFile(filepath: string): string {
// when plugin is installed, the file is placed in the plugin directory (~/.local/share/containers/podman-desktop/plugins//)
const prodFile = path.join(__dirname, filepath);
if (existsSync(prodFile)) {
return prodFile;
}
// return dev file
return path.join(__dirname, '..', '..', filepath);
}
getSpecFile(): string {
return this.getFile('../api/openapi.yaml');
}
getPackageFile(): string {
return this.getFile('../package.json');
}
dispose(): void {
this.#listener?.close();
}
private doErr(res: Response, message: string, err: unknown): void {
res.status(500).json({
message,
errors: [err instanceof Error ? err.message : err],
});
}
getSpec(_req: Request, res: Response): void {
try {
const spec = this.getSpecFile();
readFile(spec, 'utf-8')
.then(content => {
res.status(200).type('application/yaml').send(content);
})
.catch((err: unknown) => this.doErr(res, 'unable to get spec', err));
} catch (err: unknown) {
this.doErr(res, 'unable to get spec', err);
}
}
getVersion(_req: Request, res: Response): void {
try {
const pkg = this.getPackageFile();
readFile(pkg, 'utf-8')
.then(content => {
const json = JSON.parse(content);
res.status(200).json({ version: `v${json.version}` });
})
.catch((err: unknown) => this.doErr(res, 'unable to get version', err));
} catch (err: unknown) {
this.doErr(res, 'unable to get version', err);
}
}
getModels(_req: Request, res: Response): void {
try {
const models = this.modelsManager
.getModelsInfo()
.filter(model => this.modelsManager.isModelOnDisk(model.id))
.map(model => asListModelResponse(model));
res.status(200).json({ models: models });
} catch (err: unknown) {
this.doErr(res, 'unable to get models', err);
}
}
private streamLine(res: Response, obj: unknown): void {
res.write(JSON.stringify(obj) + '\n');
}
private sendResult(res: Response, obj: unknown, code: number, stream: boolean): void {
// eslint-disable-next-line sonarjs/no-selector-parameter
if (stream) {
this.streamLine(res, obj);
} else {
res.status(code).json(obj);
}
}
pullModel(req: Request, res: Response): void {
const modelName = req.body['model'] ?? req.body['name'];
let stream: boolean = true;
if ('stream' in req.body) {
stream = req.body['stream'];
}
let modelInfo: ModelInfo;
if (stream) {
this.streamLine(res, { status: 'pulling manifest' });
}
try {
modelInfo = this.catalogManager.getModelByName(modelName);
} catch {
this.sendResult(res, { error: 'pull model manifest: file does not exist' }, 500, stream);
res.end();
return;
}
if (this.modelsManager.isModelOnDisk(modelInfo.id)) {
this.sendResult(
res,
{
status: 'success',
},
200,
stream,
);
res.end();
return;
}
const abortController = new AbortController();
const downloader = this.modelsManager.createDownloader(modelInfo, abortController.signal);
if (stream) {
downloader.onEvent(event => {
if (isProgressEvent(event) && event.id === modelName) {
this.streamLine(res, {
status: `pulling ${modelInfo.sha256}`,
digest: `sha256:${modelInfo.sha256}`,
total: event.total,
completed: Math.round((event.total * event.value) / 100),
});
}
}, this);
}
downloader
.perform(modelName)
.then(async () => {
await this.modelsManager.getLocalModelsFromDisk();
await this.modelsManager.sendModelsInfo();
this.sendResult(
res,
{
status: 'success',
},
200,
stream,
);
})
.catch((err: unknown) => {
this.sendResult(
res,
{
error: String(err),
},
500,
stream,
);
})
.finally(() => {
res.end();
});
}
show(req: Request, res: Response): void {
res.status(200).json({});
res.end();
}
// makeServerAvailable checks if an inference server for the model exists and is started
// if not, it creates and/or starts it, and wait for the service to be healthy
private async makeServerAvailable(modelInfo: ModelInfo): Promise {
let servers = this.inferenceManager.getServers();
let server = servers.find(s => s.models.map(mi => mi.id).includes(modelInfo.id));
if (!server) {
const config = await withDefaultConfiguration({
modelsInfo: [modelInfo],
});
await this.inferenceManager.createInferenceServer(config);
} else if (server.status === 'stopped') {
await this.inferenceManager.startInferenceServer(server.container.containerId);
} else {
return server;
}
servers = this.inferenceManager.getServers();
server = servers.find(s => s.models.map(mi => mi.id).includes(modelInfo.id));
if (!server) {
throw new Error('unable to start inference server');
}
// wait for the container to be healthy
return new Promise(resolve => {
const disposable = this.containerRegistry.onHealthyContainerEvent(event => {
if (event.id !== server.container.containerId) {
return;
}
disposable.dispose();
resolve(server);
});
if (server.status === 'running' && server.health?.Status === 'healthy') {
disposable.dispose();
resolve(server);
}
});
}
// openAIChatCompletions executes a chat completion on an OpenAI compatible API
private async openAIChatCompletions(options: ChatCompletionOptions): Promise {
if (!options.modelInfo.file) {
throw new Error('model info has undefined file.');
}
const client = new OpenAI({
baseURL: `http://localhost:${options.server.connection.port}/v1`,
apiKey: 'dummy',
});
const createOptions = {
messages: options.messages,
model: options.modelInfo.name,
};
// we call `create` with a fixed value of `stream`, to get the specific type of `response`, either Stream, or T
if (options.stream) {
const response = await client.chat.completions.create({ ...createOptions, stream: options.stream });
await options.onStreamResponse(response);
} else {
const response = await client.chat.completions.create({ ...createOptions, stream: options.stream });
options.onNonStreamResponse(response);
}
}
// checkModelAvailability checks if a model is in the catalog
// AND has been downloaded by the user
private checkModelAvailability(modelName: string): ModelInfo {
let modelInfo: ModelInfo;
try {
modelInfo = this.catalogManager.getModelByName(modelName);
} catch {
throw `chat: model "${modelName}" does not exist`;
}
if (!this.modelsManager.isModelOnDisk(modelInfo.id)) {
throw `chat: model "${modelName}" not found, try pulling it first`;
}
return modelInfo;
}
// generate first starts the service if necessary
// If a prompt is given, it runs a chat completion with a single message and returns the result
generate(req: Request, res: Response): void {
let stream: boolean = true;
if ('stream' in req.body) {
stream = req.body['stream'];
}
const prompt = req.body['prompt'];
const modelName = req.body['model'];
let modelInfo: ModelInfo;
try {
modelInfo = this.checkModelAvailability(modelName);
} catch (error) {
this.sendResult(res, { error }, 500, stream);
res.end();
return;
}
// create/start inference server if necessary
this.makeServerAvailable(modelInfo)
.then(async (server: InferenceServer) => {
if (!prompt) {
this.sendResult(
res,
{
model: modelName,
response: '',
done: true,
done_reason: 'load',
},
200,
stream,
);
res.end();
return;
}
const messages = [
{
content: prompt,
role: 'user',
name: undefined,
} as ChatCompletionMessageParam,
];
await this.openAIChatCompletions({
server,
modelInfo,
messages,
stream,
onStreamResponse: async response => {
for await (const chunk of response) {
res.write(
JSON.stringify({
model: modelName,
response: chunk.choices[0].delta.content ?? '',
done: chunk.choices[0].finish_reason === 'stop',
done_reason: chunk.choices[0].finish_reason === 'stop' ? 'stop' : undefined,
}) + '\n',
);
}
res.end();
},
onNonStreamResponse: response => {
res.status(200).json({
model: modelName,
response: response.choices[0].message.content ?? '',
done: true,
done_reason: 'stop',
});
res.end();
},
});
})
.catch((err: unknown) => console.error(`unable to check if the inference server is running: ${err}`));
}
// chat first starts the service if necessary
// then runs a chat completion and returns the result
chat(req: Request, res: Response): void {
let stream: boolean = true;
if ('stream' in req.body) {
stream = req.body['stream'];
}
const messagesUser: Message[] = req.body['messages'];
const modelName = req.body['model'];
let modelInfo: ModelInfo;
try {
modelInfo = this.checkModelAvailability(modelName);
} catch (error) {
this.sendResult(res, { error }, 500, stream);
res.end();
return;
}
// create/start inference server if necessary
this.makeServerAvailable(modelInfo)
.then(async (server: InferenceServer) => {
const messages = messagesUser.map(
message =>
({
name: undefined,
...message,
}) as ChatCompletionMessageParam,
);
await this.openAIChatCompletions({
server,
modelInfo,
messages,
stream,
onStreamResponse: async response => {
for await (const chunk of response) {
res.write(
JSON.stringify({
model: modelName,
message: {
role: 'assistant',
content: chunk.choices[0].delta.content ?? '',
},
done: chunk.choices[0].finish_reason === 'stop',
done_reason: chunk.choices[0].finish_reason === 'stop' ? 'stop' : undefined,
}) + '\n',
);
}
res.end();
},
onNonStreamResponse: response => {
res.status(200).json({
model: modelName,
message: {
role: 'assistant',
content: response.choices[0].message.content ?? '',
},
done: true,
done_reason: 'stop',
});
res.end();
},
});
})
.catch((err: unknown) => console.error(`unable to check if the inference server is running: ${err}`));
}
ps(_req: Request, res: Response): void {
try {
const models = this.inferenceManager
.getServers()
.filter(server => server.status === 'running')
.flatMap(server => server.models)
.map(model => asProcessModelResponse(model));
res.status(200).json({ models });
} catch (err: unknown) {
this.doErr(res, 'unable to ps', err);
}
}
}
================================================
FILE: packages/backend/src/managers/application/applicationManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, describe, expect, test, vi } from 'vitest';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { ContainerProviderConnection, PodInfo, TelemetryLogger } from '@podman-desktop/api';
import { containerEngine, window } from '@podman-desktop/api';
import type { PodmanConnection } from '../podmanConnection';
import type { CatalogManager } from '../catalogManager';
import type { ModelsManager } from '../modelsManager';
import type { PodManager } from '../recipes/PodManager';
import type { RecipeManager } from '../recipes/RecipeManager';
import { ApplicationManager } from './applicationManager';
import type { Recipe, RecipeImage } from '@shared/models/IRecipe';
import type { ModelInfo } from '@shared/models/IModelInfo';
import { VMType } from '@shared/models/IPodman';
import { POD_LABEL_MODEL_ID, POD_LABEL_RECIPE_ID } from '../../utils/RecipeConstants';
import type { InferenceServer } from '@shared/models/IInference';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import type { LlamaStackManager } from '../llama-stack/llamaStackManager';
import type { ApplicationOptions } from '../../models/ApplicationOptions';
const taskRegistryMock = {
createTask: vi.fn(),
updateTask: vi.fn(),
deleteByLabels: vi.fn(),
} as unknown as TaskRegistry;
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
const podmanConnectionMock = {
onPodmanConnectionEvent: vi.fn(),
} as unknown as PodmanConnection;
const catalogManagerMock = {} as unknown as CatalogManager;
const modelsManagerMock = {
requestDownloadModel: vi.fn(),
uploadModelToPodmanMachine: vi.fn(),
} as unknown as ModelsManager;
const telemetryMock = {
logError: vi.fn(),
logUsage: vi.fn(),
} as unknown as TelemetryLogger;
const podManager = {
onStartPodEvent: vi.fn(),
onRemovePodEvent: vi.fn(),
getPodsWithLabels: vi.fn(),
createPod: vi.fn(),
getPod: vi.fn(),
findPodByLabelsValues: vi.fn(),
startPod: vi.fn(),
stopPod: vi.fn(),
removePod: vi.fn(),
} as unknown as PodManager;
const recipeManager = {
cloneRecipe: vi.fn(),
buildRecipe: vi.fn(),
} as unknown as RecipeManager;
const llamaStackManager = {
getLlamaStackContainers: vi.fn(),
} as unknown as LlamaStackManager;
vi.mock('@podman-desktop/api', () => ({
window: {
withProgress: vi.fn(),
},
ProgressLocation: {
TASK_WIDGET: 'task-widget',
},
provider: {
getContainerConnections: vi.fn(),
},
containerEngine: {
createContainer: vi.fn(),
},
Disposable: {
create: vi.fn(),
},
}));
const recipeMock: Recipe = {
id: 'recipe-test',
name: 'Test Recipe',
categories: [],
description: 'test recipe description',
repository: 'http://test-repository.test',
readme: 'test recipe readme',
};
const remoteModelMock: ModelInfo = {
id: 'model-test',
name: 'Test Model',
description: 'test model description',
url: 'http://test-repository.test',
};
const recipeImageInfoMock: RecipeImage = {
name: 'test recipe image info',
id: 'test-recipe-image-info',
appName: 'test-app-name',
engineId: 'test-engine-id',
ports: [],
modelService: false,
recipeId: recipeMock.id,
};
const connectionMock: ContainerProviderConnection = {
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
} as unknown as ContainerProviderConnection;
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
vi.mocked(recipeManager.buildRecipe).mockResolvedValue({ images: [recipeImageInfoMock] });
vi.mocked(podManager.createPod).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id' });
vi.mocked(podManager.getPod).mockResolvedValue({ engineId: 'test-engine-id', Id: 'test-pod-id' } as PodInfo);
vi.mocked(podManager.getPodsWithLabels).mockResolvedValue([]);
vi.mocked(taskRegistryMock.createTask).mockImplementation((name, state, labels) => ({
name,
state,
labels,
id: 'fake-task',
}));
vi.mocked(modelsManagerMock.uploadModelToPodmanMachine).mockResolvedValue('downloaded-model-path');
vi.mocked(llamaStackManager.getLlamaStackContainers).mockResolvedValue({
server: { containerId: 'container1', port: 10001, state: 'running' },
playground: { containerId: 'playground1', port: 10002, state: 'running' },
});
});
function getInitializedApplicationManager(): ApplicationManager {
const manager = new ApplicationManager(
taskRegistryMock,
rpcExtensionMock,
podmanConnectionMock,
catalogManagerMock,
modelsManagerMock,
telemetryMock,
podManager,
recipeManager,
llamaStackManager,
);
manager.init();
return manager;
}
describe('requestPullApplication', () => {
test('task should be set to error if pull application raise an error', async () => {
vi.mocked(window.withProgress).mockRejectedValue(new Error('pull application error'));
const trackingId = await getInitializedApplicationManager().requestPullApplication({
connection: connectionMock,
recipe: recipeMock,
model: remoteModelMock,
});
// ensure the task is created
await vi.waitFor(() => {
expect(taskRegistryMock.createTask).toHaveBeenCalledWith(`Pulling ${recipeMock.name} recipe`, 'loading', {
trackingId: trackingId,
'recipe-pulling': recipeMock.id,
});
});
// ensure the task is updated
await vi.waitFor(() => {
expect(taskRegistryMock.updateTask).toHaveBeenCalledWith(
expect.objectContaining({
state: 'error',
}),
);
});
});
});
describe('stopApplication', () => {
test('calling stop with exited pod should not create task', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({
engineId: 'test-engine-id',
Id: 'test-pod-id-existing',
Labels: {
[POD_LABEL_MODEL_ID]: remoteModelMock.id,
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
Status: 'Exited',
} as unknown as PodInfo);
await getInitializedApplicationManager().stopApplication(recipeMock.id, remoteModelMock.id);
expect(taskRegistryMock.createTask).not.toHaveBeenCalled();
expect(podManager.stopPod).not.toHaveBeenCalled();
});
test('calling stop application with running pod should create stop task ', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({
engineId: 'test-engine-id',
Id: 'test-pod-id-existing',
Labels: {
[POD_LABEL_MODEL_ID]: remoteModelMock.id,
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
Status: 'Running',
} as unknown as PodInfo);
await getInitializedApplicationManager().stopApplication(recipeMock.id, remoteModelMock.id);
expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Stopping AI App', 'loading', {
'recipe-id': recipeMock.id,
'model-id': remoteModelMock.id,
});
expect(podManager.stopPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id-existing');
});
test('error raised should make the task as failed', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({
engineId: 'test-engine-id',
Id: 'test-pod-id-existing',
Labels: {
[POD_LABEL_MODEL_ID]: remoteModelMock.id,
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
Status: 'Running',
} as unknown as PodInfo);
vi.mocked(podManager.stopPod).mockRejectedValue(new Error('stop pod error'));
await expect(() => {
return getInitializedApplicationManager().stopApplication(recipeMock.id, remoteModelMock.id);
}).rejects.toThrowError('stop pod error');
expect(taskRegistryMock.updateTask).toHaveBeenCalledWith(
expect.objectContaining({
state: 'error',
}),
);
});
});
describe('startApplication', () => {
test('expect startPod in podManager to be properly called', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({
engineId: 'test-engine-id',
Id: 'test-pod-id-existing',
Labels: {
[POD_LABEL_MODEL_ID]: remoteModelMock.id,
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
Status: 'Exited',
} as unknown as PodInfo);
await getInitializedApplicationManager().startApplication(recipeMock.id, remoteModelMock.id);
expect(podManager.startPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id-existing');
});
test('error raised should make the task as failed', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({
engineId: 'test-engine-id',
Id: 'test-pod-id-existing',
Labels: {
[POD_LABEL_MODEL_ID]: remoteModelMock.id,
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
Status: 'Exited',
} as unknown as PodInfo);
vi.mocked(podManager.startPod).mockRejectedValue(new Error('start pod error'));
await expect(() => {
return getInitializedApplicationManager().startApplication(recipeMock.id, remoteModelMock.id);
}).rejects.toThrowError('start pod error');
expect(taskRegistryMock.updateTask).toHaveBeenCalledWith(
expect.objectContaining({
state: 'error',
}),
);
});
});
describe.each([true, false])('pullApplication, with model is %o', withModel => {
let applicationOptions: ApplicationOptions;
beforeEach(() => {
applicationOptions = withModel
? {
connection: connectionMock,
recipe: recipeMock,
model: remoteModelMock,
}
: {
connection: connectionMock,
recipe: recipeMock,
dependencies: {
llamaStack: true,
},
};
});
test('labels should be propagated', async () => {
await getInitializedApplicationManager().pullApplication(applicationOptions, {
'test-label': 'test-value',
});
// clone the recipe
expect(recipeManager.cloneRecipe).toHaveBeenCalledWith(recipeMock, {
'test-label': 'test-value',
'model-id': withModel ? remoteModelMock.id : '',
});
if (withModel) {
// download model
expect(modelsManagerMock.requestDownloadModel).toHaveBeenCalledWith(remoteModelMock, {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': remoteModelMock.id,
});
// upload model to podman machine
expect(modelsManagerMock.uploadModelToPodmanMachine).toHaveBeenCalledWith(connectionMock, remoteModelMock, {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': remoteModelMock.id,
});
}
// build the recipe
expect(recipeManager.buildRecipe).toHaveBeenCalledWith(
{
connection: connectionMock,
recipe: recipeMock,
model: withModel ? remoteModelMock : undefined,
dependencies: applicationOptions.dependencies,
},
{
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': withModel ? remoteModelMock.id : '',
},
);
// create AI App task must be created
expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Creating AI App', 'loading', {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': withModel ? remoteModelMock.id : '',
});
// a pod must have been created
expect(podManager.createPod).toHaveBeenCalledWith({
provider: connectionMock,
name: expect.any(String),
portmappings: [],
labels: {
[POD_LABEL_MODEL_ID]: withModel ? remoteModelMock.id : '',
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
});
expect(containerEngine.createContainer).toHaveBeenCalledWith('test-engine-id', {
Image: recipeImageInfoMock.id,
name: expect.any(String),
Env: withModel ? [] : ['MODEL_ENDPOINT=http://host.containers.internal:10001'],
HealthCheck: undefined,
HostConfig: undefined,
Detach: true,
pod: 'test-pod-id',
start: false,
});
// finally the pod must be started
expect(podManager.startPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id');
});
test('requestDownloadModel skipped with inference server', async () => {
vi.mocked(recipeManager.buildRecipe).mockResolvedValue({
images: [recipeImageInfoMock],
inferenceServer: {
connection: {
port: 56001,
},
} as InferenceServer,
});
vi.mocked(modelsManagerMock.requestDownloadModel).mockResolvedValue('/path/to/model');
await getInitializedApplicationManager().pullApplication(applicationOptions, {
'test-label': 'test-value',
});
// clone the recipe
expect(recipeManager.cloneRecipe).toHaveBeenCalledWith(recipeMock, {
'test-label': 'test-value',
'model-id': withModel ? remoteModelMock.id : '',
});
if (withModel) {
// download model
expect(modelsManagerMock.requestDownloadModel).toHaveBeenCalledWith(remoteModelMock, {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': remoteModelMock.id,
});
// upload model to podman machine
expect(modelsManagerMock.uploadModelToPodmanMachine).not.toHaveBeenCalled();
}
// build the recipe
expect(recipeManager.buildRecipe).toHaveBeenCalledWith(
{
connection: connectionMock,
recipe: recipeMock,
model: withModel ? remoteModelMock : undefined,
dependencies: applicationOptions.dependencies,
},
{
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': withModel ? remoteModelMock.id : '',
},
);
// create AI App task must be created
expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Creating AI App', 'loading', {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
'model-id': withModel ? remoteModelMock.id : '',
});
// a pod must have been created
expect(podManager.createPod).toHaveBeenCalledWith({
provider: connectionMock,
name: expect.any(String),
portmappings: [],
labels: {
[POD_LABEL_MODEL_ID]: withModel ? remoteModelMock.id : '',
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
});
expect(containerEngine.createContainer).toHaveBeenCalledWith('test-engine-id', {
Image: recipeImageInfoMock.id,
name: expect.any(String),
Env: withModel
? ['MODEL_ENDPOINT=http://host.containers.internal:56001']
: ['MODEL_ENDPOINT=http://host.containers.internal:10001'],
HealthCheck: undefined,
HostConfig: undefined,
Detach: true,
pod: 'test-pod-id',
start: false,
});
// finally the pod must be started
expect(podManager.startPod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id');
});
test('existing application should be removed', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue({
engineId: 'test-engine-id',
Id: 'test-pod-id-existing',
Labels: {
[POD_LABEL_MODEL_ID]: remoteModelMock.id,
[POD_LABEL_RECIPE_ID]: recipeMock.id,
},
} as unknown as PodInfo);
await getInitializedApplicationManager().pullApplication(applicationOptions);
// removing existing application should create a task to notify the user
expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Removing AI App', 'loading', {
'recipe-id': recipeMock.id,
'model-id': withModel ? remoteModelMock.id : '',
});
// the remove pod should have been called
expect(podManager.removePod).toHaveBeenCalledWith('test-engine-id', 'test-pod-id-existing');
});
test('qemu connection should have specific flag', async () => {
vi.mocked(podManager.findPodByLabelsValues).mockResolvedValue(undefined);
vi.mocked(recipeManager.buildRecipe).mockResolvedValue({
images: [
recipeImageInfoMock,
{
modelService: true,
ports: ['8888'],
name: 'llamacpp',
id: 'llamacpp',
appName: 'llamacpp',
engineId: recipeImageInfoMock.engineId,
recipeId: recipeMock.id,
},
],
});
await getInitializedApplicationManager().pullApplication(applicationOptions);
// the remove pod should have been called
expect(containerEngine.createContainer).toHaveBeenCalledWith(
recipeImageInfoMock.engineId,
expect.objectContaining({
HostConfig: withModel
? {
Mounts: [
{
Mode: 'Z',
Source: 'downloaded-model-path',
Target: '/downloaded-model-path',
Type: 'bind',
},
],
}
: undefined,
}),
);
});
});
================================================
FILE: packages/backend/src/managers/application/applicationManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { RecipeComponents, RecipeImage } from '@shared/models/IRecipe';
import * as path from 'node:path';
import { containerEngine, Disposable, window, ProgressLocation } from '@podman-desktop/api';
import type {
PodCreatePortOptions,
TelemetryLogger,
PodInfo,
HostConfig,
HealthConfig,
PodContainerInfo,
ContainerProviderConnection,
} from '@podman-desktop/api';
import type { ModelsManager } from '../modelsManager';
import { getPortsFromLabel, getPortsInfo } from '../../utils/ports';
import { getDurationSecondsSince, timeout } from '../../utils/utils';
import type { ApplicationState } from '@shared/models/IApplicationState';
import type { PodmanConnection } from '../podmanConnection';
import { MSG_APPLICATIONS_STATE_UPDATE } from '@shared/Messages';
import type { CatalogManager } from '../catalogManager';
import { ApplicationRegistry } from '../../registries/ApplicationRegistry';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import { Publisher } from '../../utils/Publisher';
import { getModelPropertiesForEnvironment } from '../../utils/modelsUtils';
import { getRandomName, getRandomString } from '../../utils/randomUtils';
import type { PodManager } from '../recipes/PodManager';
import { SECOND } from '../../workers/provider/LlamaCppPython';
import type { RecipeManager } from '../recipes/RecipeManager';
import {
POD_LABEL_APP_PORTS,
POD_LABEL_MODEL_ID,
POD_LABEL_MODEL_PORTS,
POD_LABEL_RECIPE_ID,
} from '../../utils/RecipeConstants';
import { VMType } from '@shared/models/IPodman';
import { RECIPE_START_ROUTE } from '../../registries/NavigationRegistry';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { TaskRunner } from '../TaskRunner';
import { getInferenceType } from '../../utils/inferenceUtils';
import type { LlamaStackManager } from '../llama-stack/llamaStackManager';
import { isApplicationOptionsWithModelInference, type ApplicationOptions } from '../../models/ApplicationOptions';
export class ApplicationManager extends Publisher implements Disposable {
#applications: ApplicationRegistry;
protectTasks: Set = new Set();
#disposables: Disposable[];
#taskRunner: TaskRunner;
constructor(
private taskRegistry: TaskRegistry,
rpcExtension: RpcExtension,
private podmanConnection: PodmanConnection,
private catalogManager: CatalogManager,
private modelsManager: ModelsManager,
private telemetry: TelemetryLogger,
private podManager: PodManager,
private recipeManager: RecipeManager,
private llamaStackManager: LlamaStackManager,
) {
super(rpcExtension, MSG_APPLICATIONS_STATE_UPDATE, () => this.getApplicationsState());
this.#applications = new ApplicationRegistry();
this.#taskRunner = new TaskRunner(this.taskRegistry);
this.#disposables = [];
}
async requestPullApplication(options: ApplicationOptions): Promise {
// create a tracking id to put in the labels
const trackingId: string = getRandomString();
const labels: Record = {
trackingId: trackingId,
};
this.#taskRunner
.runAsTask(
{
...labels,
'recipe-pulling': options.recipe.id, // this label should only be on the master task
},
{
loadingLabel: `Pulling ${options.recipe.name} recipe`,
errorMsg: err => `Something went wrong while pulling ${options.recipe.name}: ${String(err)}`,
},
() =>
window.withProgress(
{
location: ProgressLocation.TASK_WIDGET,
title: `Pulling ${options.recipe.name}.`,
details: {
routeId: RECIPE_START_ROUTE,
routeArgs: [options.recipe.id, trackingId],
},
},
() => this.pullApplication(options, labels),
),
)
.catch(() => {});
return trackingId;
}
async pullApplication(options: ApplicationOptions, labels: Record = {}): Promise {
let modelId: string;
if (isApplicationOptionsWithModelInference(options)) {
modelId = options.model.id;
} else {
modelId = '';
}
// clear any existing status / tasks related to the pair recipeId-modelId.
this.taskRegistry.deleteByLabels({
'recipe-id': options.recipe.id,
'model-id': modelId,
});
const startTime = performance.now();
try {
// init application (git clone, models download etc.)
const podInfo: PodInfo = await this.initApplication(options, labels);
// start the pod
await this.runApplication(podInfo, {
...labels,
'recipe-id': options.recipe.id,
'model-id': modelId,
});
// measure init + start time
const durationSeconds = getDurationSecondsSince(startTime);
this.telemetry.logUsage('recipe.pull', {
'recipe.id': options.recipe.id,
'recipe.name': options.recipe.name,
durationSeconds,
});
} catch (err: unknown) {
const durationSeconds = getDurationSecondsSince(startTime);
this.telemetry.logError('recipe.pull', {
'recipe.id': options.recipe.id,
'recipe.name': options.recipe.name,
durationSeconds,
message: 'error pulling application',
error: err,
});
throw err;
}
}
/**
* This method will execute the following tasks
* - git clone
* - git checkout
* - register local repository
* - download models
* - upload models
* - build containers
* - create pod
*
* @param connection
* @param recipe
* @param model
* @param labels
* @private
*/
private async initApplication(options: ApplicationOptions, labels: Record = {}): Promise {
let modelId: string;
if (isApplicationOptionsWithModelInference(options)) {
modelId = options.model.id;
} else {
modelId = '';
}
// clone the recipe
await this.recipeManager.cloneRecipe(options.recipe, { ...labels, 'model-id': modelId });
let modelPath: string | undefined;
if (isApplicationOptionsWithModelInference(options)) {
// get model by downloading it or retrieving locally
modelPath = await this.modelsManager.requestDownloadModel(options.model, {
...labels,
'recipe-id': options.recipe.id,
'model-id': modelId,
});
}
// build all images, one per container (for a basic sample we should have 2 containers = sample app + model service)
const recipeComponents = await this.recipeManager.buildRecipe(options, {
...labels,
'recipe-id': options.recipe.id,
'model-id': modelId,
});
if (isApplicationOptionsWithModelInference(options)) {
// upload model to podman machine if user system is supported
if (!recipeComponents.inferenceServer) {
modelPath = await this.modelsManager.uploadModelToPodmanMachine(options.connection, options.model, {
...labels,
'recipe-id': options.recipe.id,
'model-id': modelId,
});
}
}
// first delete any existing pod with matching labels
if (await this.hasApplicationPod(options.recipe.id, modelId)) {
await this.removeApplication(options.recipe.id, modelId);
}
// create a pod containing all the containers to run the application
return this.createApplicationPod(options, recipeComponents, modelPath, {
...labels,
'recipe-id': options.recipe.id,
'model-id': modelId,
});
}
/**
* Given an ApplicationPodInfo, start the corresponding pod
* @param podInfo
* @param labels
*/
protected async runApplication(podInfo: PodInfo, labels?: { [key: string]: string }): Promise {
await this.#taskRunner.runAsTask(
labels ?? {},
{
loadingLabel: 'Starting AI App',
successLabel: 'AI App is running',
errorMsg: err => String(err),
},
async () => {
await this.podManager.startPod(podInfo.engineId, podInfo.Id);
// check if all containers have started successfully
for (const container of podInfo.Containers ?? []) {
await this.waitContainerIsRunning(podInfo.engineId, container);
}
},
);
return this.checkPodsHealth();
}
protected async waitContainerIsRunning(engineId: string, container: PodContainerInfo): Promise {
const TIME_FRAME_MS = 5000;
const MAX_ATTEMPTS = 60 * (60000 / TIME_FRAME_MS); // try for 1 hour
for (let i = 0; i < MAX_ATTEMPTS; i++) {
const sampleAppContainerInspectInfo = await containerEngine.inspectContainer(engineId, container.Id);
if (sampleAppContainerInspectInfo.State.Running) {
return;
}
await timeout(TIME_FRAME_MS);
}
throw new Error(`Container ${container.Id} not started in time`);
}
protected async createApplicationPod(
options: ApplicationOptions,
components: RecipeComponents,
modelPath: string | undefined,
labels?: { [key: string]: string },
): Promise {
return this.#taskRunner.runAsTask(
labels ?? {},
{
loadingLabel: 'Creating AI App',
errorMsg: err => `Something went wrong while creating pod: ${String(err)}`,
},
async ({ updateLabels }): Promise => {
const podInfo = await this.createPod(options, components.images);
updateLabels(labels => ({
...labels,
'pod-id': podInfo.Id,
}));
await this.createContainerAndAttachToPod(options, podInfo, components, modelPath, labels);
return podInfo;
},
);
}
protected async createContainerAndAttachToPod(
options: ApplicationOptions,
podInfo: PodInfo,
components: RecipeComponents,
modelPath: string | undefined,
labels?: { [key: string]: string },
): Promise {
const vmType = options.connection.vmType ?? VMType.UNKNOWN;
// temporary check to set Z flag or not - to be removed when switching to podman 5
await Promise.all(
components.images.map(async image => {
let hostConfig: HostConfig | undefined = undefined;
let envs: string[] = [];
let healthcheck: HealthConfig | undefined = undefined;
// if it's a model service we mount the model as a volume
if (modelPath && isApplicationOptionsWithModelInference(options)) {
if (image.modelService) {
const modelName = path.basename(modelPath);
hostConfig = {
Mounts: [
{
Target: `/${modelName}`,
Source: modelPath,
Type: 'bind',
Mode: vmType === VMType.QEMU ? undefined : 'Z',
},
],
};
envs = [`MODEL_PATH=/${modelName}`];
envs.push(...getModelPropertiesForEnvironment(options.model));
} else if (components.inferenceServer) {
const endPoint = `http://host.containers.internal:${components.inferenceServer.connection.port}`;
envs = [`MODEL_ENDPOINT=${endPoint}`];
} else {
const modelService = components.images.find(image => image.modelService);
if (modelService && modelService.ports.length > 0) {
const endPoint = `http://localhost:${modelService.ports[0]}`;
envs = [`MODEL_ENDPOINT=${endPoint}`];
}
}
} else if (options.dependencies?.llamaStack) {
let stack = await this.llamaStackManager.getLlamaStackContainers();
if (!stack) {
await this.llamaStackManager.createLlamaStackContainers(options.connection, labels ?? {});
stack = await this.llamaStackManager.getLlamaStackContainers();
}
if (stack) {
envs = [`MODEL_ENDPOINT=http://host.containers.internal:${stack.server?.port}`];
}
}
if (image.ports.length > 0) {
healthcheck = {
// must be the port INSIDE the container not the exposed one
Test: ['CMD-SHELL', `curl -s localhost:${image.ports[0]} > /dev/null`],
Interval: SECOND * 5,
Retries: 4 * 5,
Timeout: SECOND * 2,
};
}
const podifiedName = getRandomName(`${image.appName}-podified`);
await containerEngine.createContainer(podInfo.engineId, {
Image: image.id,
name: podifiedName,
Detach: true,
HostConfig: hostConfig,
Env: envs,
start: false,
pod: podInfo.Id,
HealthCheck: healthcheck,
});
}),
);
}
protected async createPod(options: ApplicationOptions, images: RecipeImage[]): Promise {
// find the exposed port of the sample app so we can open its ports on the new pod
const sampleAppImageInfo = images.find(image => !image.modelService);
if (!sampleAppImageInfo) {
console.error('no sample app image found');
throw new Error('no sample app found');
}
const portmappings: PodCreatePortOptions[] = [];
// we expose all ports so we can check the model service if it is actually running
for (const image of images) {
for (const exposed of image.ports) {
const localPorts = await getPortsInfo(exposed);
if (localPorts) {
portmappings.push({
container_port: parseInt(exposed),
host_port: parseInt(localPorts),
host_ip: '',
protocol: '',
range: 1,
});
}
}
}
// create new pod
const labels: Record = {
[POD_LABEL_RECIPE_ID]: options.recipe.id,
};
if (isApplicationOptionsWithModelInference(options)) {
labels[POD_LABEL_MODEL_ID] = options.model.id;
} else {
labels[POD_LABEL_MODEL_ID] = '';
}
// collecting all modelService ports
const modelPorts = images
.filter(img => img.modelService)
.flatMap(img => img.ports)
.map(port => portmappings.find(pm => `${pm.container_port}` === port)?.host_port);
if (modelPorts.length) {
labels[POD_LABEL_MODEL_PORTS] = modelPorts.join(',');
}
// collecting all application ports (excluding service ports)
const appPorts = images
.filter(img => !img.modelService)
.flatMap(img => img.ports)
.map(port => portmappings.find(pm => `${pm.container_port}` === port)?.host_port);
if (appPorts.length) {
labels[POD_LABEL_APP_PORTS] = appPorts.join(',');
}
const { engineId, Id } = await this.podManager.createPod({
provider: options.connection,
name: getRandomName(`pod-${sampleAppImageInfo.appName}`),
portmappings: portmappings,
labels,
});
return this.podManager.getPod(engineId, Id);
}
/**
* Stop the pod with matching recipeId and modelId
* @param recipeId
* @param modelId
*/
async stopApplication(recipeId: string, modelId: string): Promise {
// clear existing tasks
this.clearTasks(recipeId, modelId);
// get the application pod
const appPod = await this.getApplicationPod(recipeId, modelId);
// if the pod is already stopped skip
if (appPod.Status !== 'Exited') {
await this.#taskRunner.runAsTask(
{
'recipe-id': recipeId,
'model-id': modelId,
},
{
loadingLabel: 'Stopping AI App',
successLabel: 'AI App Stopped',
errorLabel: 'Error stopping AI App',
errorMsg: err => `Error removing the pod.: ${String(err)}`,
},
() => this.podManager.stopPod(appPod.engineId, appPod.Id),
);
await this.checkPodsHealth();
}
return appPod;
}
/**
* Utility method to start a pod using (recipeId, modelId)
* @param recipeId
* @param modelId
*/
async startApplication(recipeId: string, modelId: string): Promise {
this.clearTasks(recipeId, modelId);
const pod = await this.getApplicationPod(recipeId, modelId);
return this.runApplication(pod, {
'recipe-id': recipeId,
'model-id': modelId,
});
}
protected refresh(): void {
// clear existing applications
this.#applications.clear();
// collect all pods based on label
this.podManager
.getPodsWithLabels([POD_LABEL_RECIPE_ID])
.then(pods => {
pods.forEach(pod => this.adoptPod(pod));
})
.catch((err: unknown) => {
console.error('error during adoption of existing playground containers', err);
});
// notify
this.notify();
}
init(): void {
this.podmanConnection.onPodmanConnectionEvent(() => {
this.refresh();
});
this.podManager.onStartPodEvent((pod: PodInfo) => {
this.adoptPod(pod);
});
this.podManager.onRemovePodEvent(({ podId }) => {
this.forgetPodById(podId);
});
const ticker = (): void => {
this.checkPodsHealth()
.catch((err: unknown) => {
console.error('error getting pods statuses', err);
})
.finally(() => (timerId = setTimeout(ticker, 10000)));
};
// using a recursive setTimeout instead of setInterval as we don't know how long the operation takes
let timerId = setTimeout(ticker, 1000);
this.#disposables.push(
Disposable.create(() => {
clearTimeout(timerId);
}),
);
// refresh on init
this.refresh();
}
protected adoptPod(pod: PodInfo): void {
if (!pod.Labels) {
return;
}
const recipeId = pod.Labels[POD_LABEL_RECIPE_ID];
const modelId = pod.Labels[POD_LABEL_MODEL_ID];
if (!recipeId || !modelId) {
return;
}
const appPorts = getPortsFromLabel(pod.Labels, POD_LABEL_APP_PORTS);
const modelPorts = getPortsFromLabel(pod.Labels, POD_LABEL_MODEL_PORTS);
if (this.#applications.has({ recipeId, modelId })) {
return;
}
const state: ApplicationState = {
recipeId,
modelId,
pod,
appPorts,
modelPorts,
health: 'starting',
backend: getInferenceType(this.modelsManager.getModelsInfo().filter(m => m.id === modelId)),
};
this.updateApplicationState(recipeId, modelId, state);
}
protected forgetPodById(podId: string): void {
const app = Array.from(this.#applications.values()).find(p => p.pod.Id === podId);
if (!app) {
return;
}
if (!app.pod.Labels) {
return;
}
const recipeId = app.pod.Labels[POD_LABEL_RECIPE_ID];
const modelId = app.pod.Labels[POD_LABEL_MODEL_ID];
if (!recipeId || !modelId) {
return;
}
if (!this.#applications.has({ recipeId, modelId })) {
return;
}
this.#applications.delete({ recipeId, modelId });
this.notify();
const protect = this.protectTasks.has(podId);
if (!protect) {
this.taskRegistry.createTask('AI App stopped manually', 'success', {
'recipe-id': recipeId,
'model-id': modelId,
});
} else {
this.protectTasks.delete(podId);
}
}
protected async checkPodsHealth(): Promise {
const pods = await this.podManager.getPodsWithLabels([POD_LABEL_RECIPE_ID, POD_LABEL_MODEL_ID]);
let changes = false;
for (const pod of pods) {
const recipeId = pod.Labels[POD_LABEL_RECIPE_ID];
const modelId = pod.Labels[POD_LABEL_MODEL_ID];
if (!this.#applications.has({ recipeId, modelId })) {
// a fresh pod could not have been added yet, we will handle it at next iteration
continue;
}
const podHealth = await this.podManager.getHealth(pod);
const state = this.#applications.get({ recipeId, modelId });
if (state.health !== podHealth) {
state.health = podHealth;
state.pod = pod;
this.#applications.set({ recipeId, modelId }, state);
changes = true;
}
if (pod.Status !== state.pod.Status) {
state.pod = pod;
changes = true;
}
}
if (changes) {
this.notify();
}
}
protected updateApplicationState(recipeId: string, modelId: string, state: ApplicationState): void {
this.#applications.set({ recipeId, modelId }, state);
this.notify();
}
getApplicationsState(): ApplicationState[] {
return Array.from(this.#applications.values());
}
protected clearTasks(recipeId: string, modelId: string): void {
// clear any existing status / tasks related to the pair recipeId-modelId.
this.taskRegistry.deleteByLabels({
'recipe-id': recipeId,
'model-id': modelId,
});
}
/**
* Method that will stop then remove a pod corresponding to the recipe and model provided
* @param recipeId
* @param modelId
*/
async removeApplication(recipeId: string, modelId: string): Promise {
const appPod = await this.stopApplication(recipeId, modelId);
this.protectTasks.add(appPod.Id);
await this.#taskRunner.runAsTask(
{
'recipe-id': recipeId,
'model-id': modelId,
},
{
loadingLabel: 'Removing AI App',
successLabel: 'AI App Removed',
errorLabel: 'Error stopping AI App',
errorMsg: () => 'error removing the pod. Please try to remove the pod manually',
},
() => this.podManager.removePod(appPod.engineId, appPod.Id),
);
}
async restartApplication(connection: ContainerProviderConnection, recipeId: string, modelId: string): Promise {
const appPod = await this.getApplicationPod(recipeId, modelId);
await this.removeApplication(recipeId, modelId);
const recipe = this.catalogManager.getRecipeById(recipeId);
let opts: ApplicationOptions;
if (appPod.Labels[POD_LABEL_MODEL_ID] === '') {
opts = {
connection,
recipe,
};
} else {
const model = this.catalogManager.getModelById(appPod.Labels[POD_LABEL_MODEL_ID]);
opts = {
connection,
recipe,
model,
};
}
// init the recipe
const podInfo = await this.initApplication(opts);
// start the pod
return this.runApplication(podInfo, {
'recipe-id': recipeId,
'model-id': modelId,
});
}
async getApplicationPorts(recipeId: string, modelId: string): Promise {
const state = this.#applications.get({ recipeId, modelId });
if (state) {
return state.appPorts;
}
throw new Error(`Recipe ${recipeId} has no ports available`);
}
protected async getApplicationPod(recipeId: string, modelId: string): Promise {
const appPod = await this.findPod(recipeId, modelId);
if (!appPod) {
throw new Error(`no pod found with recipe Id ${recipeId} and model Id ${modelId}`);
}
return appPod;
}
protected async hasApplicationPod(recipeId: string, modelId: string): Promise {
const pod = await this.podManager.findPodByLabelsValues({
LABEL_RECIPE_ID: recipeId,
LABEL_MODEL_ID: modelId,
});
return !!pod;
}
protected async findPod(recipeId: string, modelId: string): Promise {
return this.podManager.findPodByLabelsValues({
[POD_LABEL_RECIPE_ID]: recipeId,
[POD_LABEL_MODEL_ID]: modelId,
});
}
dispose(): void {
this.#disposables.forEach(disposable => disposable.dispose());
}
}
================================================
FILE: packages/backend/src/managers/catalogManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
/* eslint-disable @typescript-eslint/no-explicit-any */
import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest';
import content from '../tests/ai-test.json';
import userContent from '../tests/ai-user-test.json';
import { EventEmitter, window } from '@podman-desktop/api';
import { CatalogManager } from './catalogManager';
import type { Stats } from 'node:fs';
import { promises, existsSync } from 'node:fs';
import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog';
import path from 'node:path';
import { version } from '../assets/ai.json';
import * as catalogUtils from '../utils/catalogUtils';
import type { RpcExtension } from '@shared/messages/MessageProxy';
vi.mock('../assets/ai.json', async importOriginal => {
// eslint-disable-next-line @typescript-eslint/consistent-type-imports
const { version } = await importOriginal();
return {
default: { ...content, version: version },
version: version,
};
});
vi.mock('node:fs');
vi.mock('node:fs/promises');
vi.mock('node:path');
vi.mock('@podman-desktop/api', async () => {
return {
EventEmitter: vi.fn(),
window: {
showNotification: vi.fn(),
},
ProgressLocation: {
TASK_WIDGET: 'TASK_WIDGET',
},
fs: {
createFileSystemWatcher: (): unknown => ({
onDidCreate: vi.fn(),
onDidDelete: vi.fn(),
onDidChange: vi.fn(),
}),
},
};
});
let catalogManager: CatalogManager;
beforeEach(async () => {
vi.resetAllMocks();
// mock EventEmitter logic for all tests
vi.mocked(EventEmitter).mockImplementation(() => {
const listeners: ((value: unknown) => void)[] = [];
return {
event: vi.fn().mockImplementation(callback => {
listeners.push(callback);
}),
fire: vi.fn().mockImplementation((content: unknown) => {
listeners.forEach(listener => listener(content));
}),
} as unknown as EventEmitter;
});
const appUserDirectory = '.';
// Creating CatalogManager
catalogManager = new CatalogManager(
{
fire: vi.fn().mockResolvedValue(true),
} as unknown as RpcExtension,
appUserDirectory,
);
});
describe('invalid user catalog', () => {
beforeEach(async () => {
vi.mocked(promises.readFile).mockResolvedValue('invalid json');
await catalogManager.init();
});
test('expect correct model is returned with valid id', () => {
const model = catalogManager.getModelById('llama-2-7b-chat.Q5_K_S');
expect(model).toBeDefined();
expect(model.name).toEqual('Llama-2-7B-Chat-GGUF');
expect(model.registry).toEqual('Hugging Face');
expect(model.url).toEqual(
'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf',
);
});
test('expect error if id does not correspond to any model', () => {
expect(() => catalogManager.getModelById('unknown')).toThrowError('No model found having id unknown');
});
});
test('expect correct model is returned from default catalog with valid id when no user catalog exists', async () => {
vi.mocked(existsSync).mockReturnValue(false);
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getRecipes().length > 0);
const model = catalogManager.getModelById('llama-2-7b-chat.Q5_K_S');
expect(model).toBeDefined();
expect(model.name).toEqual('Llama-2-7B-Chat-GGUF');
expect(model.registry).toEqual('Hugging Face');
expect(model.url).toEqual(
'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf',
);
});
test('expect correct model is returned with valid id when the user catalog is valid', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1'));
const model = catalogManager.getModelById('model1');
expect(model).toBeDefined();
expect(model.name).toEqual('Model 1');
expect(model.registry).toEqual('Hugging Face');
expect(model.url).toEqual('https://model1.example.com');
});
test('expect to call writeFile in addLocalModelsToCatalog with catalog updated', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getRecipes().length > 0);
const mtimeDate = new Date('2024-04-03T09:51:15.766Z');
vi.mocked(promises.stat).mockResolvedValue({
size: 1,
mtime: mtimeDate,
} as Stats);
vi.mocked(path.resolve).mockReturnValue('path');
vi.mocked(promises.writeFile).mockResolvedValue();
await catalogManager.importUserModels([
{
name: 'custom-model',
path: '/root/path/file.gguf',
},
]);
expect(promises.mkdir).toHaveBeenCalled();
expect(promises.writeFile).toBeCalledWith('path', expect.any(String), 'utf-8');
});
test('expect to call writeFile in removeLocalModelFromCatalog with catalog updated', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
vi.mocked(path.resolve).mockReturnValue('path');
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getRecipes().length > 0);
vi.mocked(promises.writeFile).mockResolvedValue();
const updatedCatalog: ApplicationCatalog = { ...userContent };
updatedCatalog.models = updatedCatalog.models.filter(m => m.id !== 'model1');
await catalogManager.removeUserModel('model1');
expect(promises.writeFile).toBeCalledWith(
'path',
expect.stringContaining(`"version": "${catalogUtils.CatalogFormat.CURRENT}"`),
'utf-8',
);
});
test('catalog should be the combination of user catalog and default catalog', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
vi.mocked(path.resolve).mockReturnValue('path');
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().length > userContent.models.length);
const mtimeDate = new Date('2024-04-03T09:51:15.766Z');
vi.mocked(promises.stat).mockResolvedValue({
size: 1,
mtime: mtimeDate,
} as Stats);
vi.mocked(path.resolve).mockReturnValue('path');
const catalog = catalogManager.getCatalog();
expect(catalog).toEqual({
version: catalogUtils.CatalogFormat.CURRENT,
recipes: [...content.recipes, ...userContent.recipes],
models: [...content.models, ...userContent.models],
categories: [...content.categories, ...userContent.categories],
});
});
test('catalog should use user items in favour of default', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(path.resolve).mockReturnValue('path');
const overwriteFullCatalog: ApplicationCatalog = {
version: catalogUtils.CatalogFormat.CURRENT,
recipes: content.recipes.map(recipe => ({
...recipe,
name: 'user-recipe-overwrite',
})),
models: content.models.map(model => ({
...model,
name: 'user-model-overwrite',
})),
categories: content.categories.map(category => ({
...category,
name: 'user-model-overwrite',
})),
};
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(overwriteFullCatalog));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().length > 0);
const mtimeDate = new Date('2024-04-03T09:51:15.766Z');
vi.mocked(promises.stat).mockResolvedValue({
size: 1,
mtime: mtimeDate,
} as Stats);
vi.mocked(path.resolve).mockReturnValue('path');
const catalog = catalogManager.getCatalog();
expect(catalog).toEqual(overwriteFullCatalog);
});
test('default catalog should have latest version', () => {
expect(version).toBe(catalogUtils.CatalogFormat.CURRENT);
});
test('wrong catalog version should create a notification', () => {
catalogManager['onUserCatalogUpdate']({ version: catalogUtils.CatalogFormat.UNKNOWN });
expect(window.showNotification).toHaveBeenCalledWith(
expect.objectContaining({
title: 'Incompatible user-catalog',
}),
);
});
test('malformed catalog should create a notification', async () => {
vi.mocked(existsSync).mockReturnValue(false);
vi.mocked(path.resolve).mockReturnValue('path');
catalogManager['onUserCatalogUpdate']({
version: catalogUtils.CatalogFormat.CURRENT,
models: [
{
fakeProperty: 'hello',
},
],
recipes: [],
categories: [],
});
expect(window.showNotification).toHaveBeenCalledWith(
expect.objectContaining({
title: 'Error loading the user catalog',
body: 'Something went wrong while trying to load the user catalog: Error: invalid model format',
}),
);
});
describe('spy on catalogUtils.sanitize', () => {
beforeEach(() => {
// do not mock the complete catalogUtils module but only spy the `sanitize` function,
// as we want to keep the original `catalogUtils.hasCatalogWrongFormat` function
vi.spyOn(catalogUtils, 'sanitize');
});
afterEach(() => {
vi.mocked(catalogUtils.sanitize).mockRestore();
});
test('catalog with undefined version should call sanitize function to try converting it', () => {
vi.mocked(promises.writeFile).mockResolvedValue();
catalogManager['onUserCatalogUpdate']({
recipes: [
{
id: 'chatbot',
description: 'This is a Streamlit chat demo application.',
name: 'ChatBot',
repository: 'https://github.com/containers/ai-lab-recipes',
ref: 'v1.1.3',
icon: 'natural-language-processing',
categories: ['natural-language-processing'],
basedir: 'recipes/natural_language_processing/chatbot',
readme: '',
models: ['hf.instructlab.granite-7b-lab-GGUF', 'hf.instructlab.merlinite-7b-lab-GGUF'],
},
],
models: [],
});
expect(catalogUtils.sanitize).toHaveBeenCalled();
expect(promises.writeFile).toHaveBeenCalled();
});
});
test('filter recipes by language', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1'));
const result1 = catalogManager.filterRecipes({
languages: ['lang1'],
});
expect(result1.result.map(r => r.id)).toEqual(['recipe1']);
expect(result1.choices).toEqual({
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 2 },
{ name: 'lang11', count: 1 },
{ name: 'lang2', count: 1 },
{ name: 'lang3', count: 1 },
],
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 1 },
],
tools: [{ name: 'tool1', count: 1 }],
});
const result2 = catalogManager.filterRecipes({
languages: ['lang2'],
});
expect(result2.result.map(r => r.id)).toEqual(['recipe2']);
expect(result2.choices).toEqual({
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 2 },
{ name: 'lang11', count: 1 },
{ name: 'lang2', count: 1 },
{ name: 'lang3', count: 1 },
],
frameworks: [
{ name: 'fw10', count: 1 },
{ name: 'fw2', count: 1 },
],
tools: [{ name: 'tool2', count: 1 }],
});
});
test('filter recipes by tool', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1'));
const result1 = catalogManager.filterRecipes({
tools: ['tool1'],
});
expect(result1.result.map(r => r.id)).toEqual(['recipe1']);
expect(result1.choices).toEqual({
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 1 },
],
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 1 },
],
tools: [
{ name: 'tool1', count: 1 },
{ name: 'tool2', count: 1 },
{ name: 'tool3', count: 1 },
],
});
const result2 = catalogManager.filterRecipes({
tools: ['tool2'],
});
expect(result2.result.map(r => r.id)).toEqual(['recipe2']);
expect(result2.choices).toEqual({
frameworks: [
{ name: 'fw10', count: 1 },
{ name: 'fw2', count: 1 },
],
languages: [
{ name: 'lang10', count: 1 },
{ name: 'lang2', count: 1 },
],
tools: [
{ name: 'tool1', count: 1 },
{ name: 'tool2', count: 1 },
{ name: 'tool3', count: 1 },
],
});
const result3 = catalogManager.filterRecipes({
tools: ['tool1', 'tool2'],
});
expect(result3.result.map(r => r.id)).toEqual(['recipe1', 'recipe2']);
expect(result3.choices).toEqual({
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 2 },
{ name: 'fw2', count: 1 },
],
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 2 },
{ name: 'lang2', count: 1 },
],
tools: [
{ name: 'tool1', count: 1 },
{ name: 'tool2', count: 1 },
{ name: 'tool3', count: 1 },
],
});
});
test('filter recipes by framework', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1'));
const result1 = catalogManager.filterRecipes({
frameworks: ['fw1'],
});
expect(result1.result.map(r => r.id)).toEqual(['recipe1']);
expect(result1.choices).toEqual({
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 1 },
],
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 3 },
{ name: 'fw11', count: 1 },
{ name: 'fw2', count: 2 },
],
tools: [{ name: 'tool1', count: 1 }],
});
const result2 = catalogManager.filterRecipes({
frameworks: ['fw2'],
});
expect(result2.result.map(r => r.id)).toEqual(['recipe2', 'recipe3']);
expect(result2.choices).toEqual({
languages: [
{ name: 'lang10', count: 1 },
{ name: 'lang11', count: 1 },
{ name: 'lang2', count: 1 },
{ name: 'lang3', count: 1 },
],
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 3 },
{ name: 'fw11', count: 1 },
{ name: 'fw2', count: 2 },
],
tools: [
{ name: 'tool2', count: 1 },
{ name: 'tool3', count: 1 },
],
});
const result3 = catalogManager.filterRecipes({
frameworks: ['fw1', 'fw2'],
});
expect(result3.result.map(r => r.id)).toEqual(['recipe1', 'recipe2', 'recipe3']);
expect(result3.choices).toEqual({
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 2 },
{ name: 'lang11', count: 1 },
{ name: 'lang2', count: 1 },
{ name: 'lang3', count: 1 },
],
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 3 },
{ name: 'fw11', count: 1 },
{ name: 'fw2', count: 2 },
],
tools: [
{ name: 'tool1', count: 1 },
{ name: 'tool2', count: 1 },
{ name: 'tool3', count: 1 },
],
});
});
test('filter recipes by language and framework', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1'));
const result1 = catalogManager.filterRecipes({
languages: ['lang2'],
frameworks: ['fw2'],
});
expect(result1.result.map(r => r.id)).toEqual(['recipe2']);
expect(result1.choices).toEqual({
languages: [
{ name: 'lang10', count: 1 },
{ name: 'lang11', count: 1 },
{ name: 'lang2', count: 1 },
{ name: 'lang3', count: 1 },
],
frameworks: [
{ name: 'fw10', count: 1 },
{ name: 'fw2', count: 1 },
],
tools: [{ name: 'tool2', count: 1 }],
});
});
test('filter recipes by language, tool and framework', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
await catalogManager.init();
await vi.waitUntil(() => catalogManager.getModels().some(model => model.id === 'model1'));
const result1 = catalogManager.filterRecipes({
languages: ['lang1'],
tools: ['tool1'],
frameworks: ['fw1'],
});
expect(result1.result.map(r => r.id)).toEqual(['recipe1']);
expect(result1.choices).toEqual({
languages: [
{ name: 'lang1', count: 1 },
{ name: 'lang10', count: 1 },
],
frameworks: [
{ name: 'fw1', count: 1 },
{ name: 'fw10', count: 1 },
],
tools: [{ name: 'tool1', count: 1 }],
});
});
test('models are loaded as soon as init is finished when no user catalog', async () => {
await catalogManager.init();
expect(catalogManager.getModels()).toHaveLength(3);
});
test('models are loaded as soon as init is finished when user catalog exists', async () => {
vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(userContent));
vi.mocked(existsSync).mockReturnValue(true);
await catalogManager.init();
expect(catalogManager.getModels()).toHaveLength(5);
});
================================================
FILE: packages/backend/src/managers/catalogManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { ApplicationCatalog } from '@shared/models/IApplicationCatalog';
import fs, { promises } from 'node:fs';
import path from 'node:path';
import crypto from 'node:crypto';
import defaultCatalog from '../assets/ai.json';
import type { Recipe } from '@shared/models/IRecipe';
import type { ModelInfo } from '@shared/models/IModelInfo';
import { MSG_NEW_CATALOG_STATE } from '@shared/Messages';
import { type Disposable, type Event, EventEmitter, window } from '@podman-desktop/api';
import { JsonWatcher } from '../utils/JsonWatcher';
import { Publisher } from '../utils/Publisher';
import type { LocalModelImportInfo } from '@shared/models/ILocalModelInfo';
import { InferenceType } from '@shared/models/IInference';
import { CatalogFormat, hasCatalogWrongFormat, merge, sanitize } from '../utils/catalogUtils';
import type { FilterRecipesResult, RecipeChoices, RecipeFilters } from '@shared/models/FilterRecipesResult';
import type { RpcExtension } from '@shared/messages/MessageProxy';
export const USER_CATALOG = 'user-catalog.json';
export class CatalogManager extends Publisher implements Disposable {
private readonly _onUpdate = new EventEmitter();
readonly onUpdate: Event = this._onUpdate.event;
private catalog: ApplicationCatalog;
#jsonWatcher: JsonWatcher | undefined;
#notification: Disposable | undefined;
constructor(
rpcExtension: RpcExtension,
private appUserDirectory: string,
) {
super(rpcExtension, MSG_NEW_CATALOG_STATE, () => this.getCatalog());
// We start with an empty catalog, for the methods to work before the catalog is loaded
this.catalog = {
version: CatalogFormat.CURRENT,
categories: [],
models: [],
recipes: [],
};
}
/**
* The init method will start a watcher on the user catalog.json
*/
async init(): Promise {
return new Promise(resolve => {
// Creating a json watcher
this.#jsonWatcher = new JsonWatcher(this.getUserCatalogPath(), {
version: CatalogFormat.CURRENT,
recipes: [],
models: [],
categories: [],
});
this.#jsonWatcher.onContentUpdated(content => {
this.onUserCatalogUpdate(content);
resolve();
});
this.#jsonWatcher.init();
});
}
private loadDefaultCatalog(): void {
this.catalog = defaultCatalog as ApplicationCatalog;
this.notify();
}
private onUserCatalogUpdate(content: unknown): void {
// if there is no version in the user catalog, we try to sanitize it
// most likely it can be converted automatically to the current version without showing any notification to the user
if (content && typeof content === 'object' && hasCatalogWrongFormat(content)) {
try {
content = sanitize(content);
// overwrite the catalog on disk
const userCatalogPath = this.getUserCatalogPath();
promises.writeFile(userCatalogPath, JSON.stringify(content, undefined, 2), 'utf-8').catch((err: unknown) => {
console.error('Something went wrong while trying to save catalog', err);
});
} catch (e) {
console.error(e);
}
}
if (!content || typeof content !== 'object') {
this.loadDefaultCatalog();
return;
}
// Get the user-catalog version
let userCatalogFormat: string = CatalogFormat.UNKNOWN;
if ('version' in content && typeof content.version === 'string') {
userCatalogFormat = content.version;
}
if (userCatalogFormat !== CatalogFormat.CURRENT) {
this.loadDefaultCatalog();
if (!this.#notification) {
this.#notification = window.showNotification({
type: 'error',
title: 'Incompatible user-catalog',
body: `The catalog is using an older version of the catalog incompatible with current version ${CatalogFormat.CURRENT}.`,
markdownActions:
':button[See migration guide]{href=https://github.com/containers/podman-desktop-extension-ai-lab/blob/main/MIGRATION.md title="Migration guide"}',
});
}
console.error(
`the user-catalog provided is using version ${userCatalogFormat} expected ${CatalogFormat.CURRENT}. You can follow the migration guide.`,
);
return;
}
// merging default catalog with user catalog
try {
this.catalog = merge(sanitize(defaultCatalog), sanitize({ ...content, version: userCatalogFormat }));
// reset notification if everything went smoothly
this.#notification?.dispose();
this.#notification = undefined;
} catch (err: unknown) {
if (!this.#notification) {
this.#notification = window.showNotification({
type: 'error',
title: 'Error loading the user catalog',
body: `Something went wrong while trying to load the user catalog: ${String(err)}`,
});
}
console.error(err);
this.loadDefaultCatalog();
}
this.notify();
}
override notify(): void {
super.notify();
this._onUpdate.fire(this.getCatalog());
}
dispose(): void {
this.#jsonWatcher?.dispose();
this.#notification?.dispose();
}
public getCatalog(): ApplicationCatalog {
return this.catalog;
}
public getModels(): ModelInfo[] {
return this.catalog.models;
}
public getModelById(modelId: string): ModelInfo {
const model = this.getModels().find(m => modelId === m.id);
if (!model) {
throw new Error(`No model found having id ${modelId}`);
}
return model;
}
public getModelByName(modelName: string): ModelInfo {
const model = this.getModels().find(m => modelName === m.name);
if (!model) {
throw new Error(`No model found having name ${modelName}`);
}
return model;
}
public getRecipes(): Recipe[] {
return this.catalog.recipes;
}
public getRecipeById(recipeId: string): Recipe {
const recipe = this.getRecipes().find(r => recipeId === r.id);
if (!recipe) {
throw new Error(`No recipe found having id ${recipeId}`);
}
return recipe;
}
/**
* This method is used to imports user's local models.
* @param localModels the models to imports
*/
async importUserModels(localModels: LocalModelImportInfo[]): Promise {
const userCatalogPath = this.getUserCatalogPath();
let content: ApplicationCatalog;
// check if we already have an existing user's catalog
if (fs.existsSync(userCatalogPath)) {
const raw = await promises.readFile(userCatalogPath, 'utf-8');
content = sanitize(JSON.parse(raw));
} else {
content = {
version: CatalogFormat.CURRENT,
recipes: [],
models: [],
categories: [],
};
}
// Transform local models into ModelInfo
const models: ModelInfo[] = await Promise.all(
localModels.map(async local => {
const statFile = await promises.stat(local.path);
const sha256 = crypto.createHash('sha256').update(local.path).digest('hex');
return {
id: sha256,
name: local.name,
description: `Model imported from ${local.path}`,
file: {
path: path.dirname(local.path),
file: path.basename(local.path),
size: statFile.size,
creation: statFile.mtime,
},
memory: statFile.size,
backend: local.backend ?? InferenceType.NONE,
};
}),
);
// Add all our models infos to the user's models catalog
content.models.push(...models);
// ensure parent directory exists
await promises.mkdir(path.dirname(userCatalogPath), { recursive: true });
// overwrite the existing catalog
return promises.writeFile(userCatalogPath, JSON.stringify(content, undefined, 2), 'utf-8');
}
/**
* Remove a model from the user's catalog.
* @param modelId
*/
async removeUserModel(modelId: string): Promise {
const userCatalogPath = this.getUserCatalogPath();
if (!fs.existsSync(userCatalogPath)) {
throw new Error('User catalog does not exist.');
}
const raw = await promises.readFile(userCatalogPath, 'utf-8');
const content = sanitize(JSON.parse(raw));
return promises.writeFile(
userCatalogPath,
JSON.stringify(
{
version: content.version,
recipes: content.recipes,
models: content.models.filter(model => model.id !== modelId),
categories: content.categories,
},
undefined,
2,
),
'utf-8',
);
}
/**
* Return the path to the user catalog
*/
private getUserCatalogPath(): string {
return path.resolve(this.appUserDirectory, USER_CATALOG);
}
public filterRecipes(filters: RecipeFilters): FilterRecipesResult {
let result = this.getRecipes();
for (const [filter, values] of Object.entries(filters)) {
switch (filter) {
case 'languages': {
let res: Recipe[] = [];
for (const value of values) {
res = [...res, ...result.filter(r => r.languages?.includes(value))];
}
result = res;
break;
}
case 'tools':
result = result.filter(r => values.includes(r.backend ?? ''));
break;
case 'frameworks': {
let res: Recipe[] = [];
for (const value of values) {
res = [...res, ...result.filter(r => r.frameworks?.includes(value))];
}
result = res;
break;
}
}
}
const choices: RecipeChoices = {};
if ('languages' in filters) {
const subfilters = structuredClone(filters);
delete subfilters.languages;
choices.languages = this.filterRecipes(subfilters).choices.languages;
} else {
choices.languages = result
.flatMap(r => r.languages)
.filter(l => l !== undefined)
.filter((value, index, array) => array.indexOf(value) === index)
.sort((a, b) => a.localeCompare(b))
.map(l => ({
name: l,
count: result.filter(r => r.languages?.includes(l)).length,
}));
}
if ('tools' in filters) {
const subfilters = structuredClone(filters);
delete subfilters.tools;
choices.tools = this.filterRecipes(subfilters).choices.tools;
} else {
choices.tools = result
.map(r => r.backend)
.filter(b => b !== undefined)
.filter((value, index, array) => array.indexOf(value) === index)
.sort((a, b) => a.localeCompare(b))
.map(t => ({
name: t,
count: result.filter(r => r.backend === t).length,
}));
}
if ('frameworks' in filters) {
const subfilters = structuredClone(filters);
delete subfilters.frameworks;
choices.frameworks = this.filterRecipes(subfilters).choices.frameworks;
} else {
choices.frameworks = result
.flatMap(r => r.frameworks)
.filter(f => f !== undefined)
.filter((value, index, array) => array.indexOf(value) === index)
.sort((a, b) => a.localeCompare(b))
.map(f => ({
name: f,
count: result.filter(r => r.frameworks?.includes(f)).length,
}));
}
return {
filters,
choices,
result,
};
}
}
================================================
FILE: packages/backend/src/managers/gitManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { describe, expect, test, vi, beforeEach } from 'vitest';
import { GitManager } from './gitManager';
import { statSync, existsSync, mkdirSync, type Stats, rmSync } from 'node:fs';
import { window } from '@podman-desktop/api';
import type { ReadCommitResult } from 'isomorphic-git';
import git from 'isomorphic-git';
vi.mock('isomorphic-git', () => {
return {
default: {
clone: vi.fn(),
currentBranch: vi.fn(),
log: vi.fn(),
resolveRef: vi.fn(),
fetch: vi.fn(),
getConfig: vi.fn(),
statusMatrix: vi.fn(),
},
};
});
vi.mock('node:fs');
vi.mock('@podman-desktop/api', async () => {
return {
window: {
showWarningMessage: vi.fn(),
},
};
});
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(git.resolveRef).mockResolvedValue('dummyCommit');
});
describe('cloneRepository', () => {
const gitmanager = new GitManager();
test('clone and checkout if ref is specified', async () => {
await gitmanager.cloneRepository({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
});
expect(git.clone).toBeCalledWith({
fs: expect.anything(),
http: expect.anything(),
url: 'repo',
dir: 'target',
ref: '000',
singleBranch: true,
depth: 1,
});
});
test('clone and checkout if ref is NOT specified', async () => {
await gitmanager.cloneRepository({
repository: 'repo',
targetDirectory: 'target',
});
expect(git.clone).toBeCalledWith({
fs: expect.anything(),
http: expect.anything(),
url: 'repo',
dir: 'target',
ref: undefined,
singleBranch: true,
depth: 1,
});
});
});
describe('processCheckout', () => {
test('first install no existing folder', async () => {
vi.mocked(existsSync).mockReturnValue(false);
await new GitManager().processCheckout({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
});
expect(existsSync).toHaveBeenCalledWith('target');
expect(mkdirSync).toHaveBeenCalledWith('target', { recursive: true });
expect(git.clone).toBeCalledWith({
fs: expect.anything(),
http: expect.anything(),
url: 'repo',
dir: 'target',
ref: '000',
singleBranch: true,
depth: 1,
});
});
test('existing folder valid', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: true });
await gitmanager.processCheckout({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
});
expect(gitmanager.isRepositoryUpToDate).toHaveBeenCalled();
expect(existsSync).toHaveBeenCalledWith('target');
expect(statSync).toHaveBeenCalledWith('target');
expect(mkdirSync).not.toHaveBeenCalled();
expect(git.clone).not.toHaveBeenCalled();
});
test('existing folder detached and user cancel', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(window.showWarningMessage).mockResolvedValue('Cancel');
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: false });
await expect(
gitmanager.processCheckout({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
}),
).rejects.toThrowError('Cancelled');
});
test('existing folder not-updatable and user continue', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(window.showWarningMessage).mockResolvedValue('Continue');
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: false });
await gitmanager.processCheckout({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
});
expect(rmSync).not.toHaveBeenCalled();
expect(mkdirSync).not.toHaveBeenCalled();
expect(git.clone).not.toHaveBeenCalled();
});
test('existing folder not-updatable and user reset', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(window.showWarningMessage).mockResolvedValue('Reset');
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: false });
await gitmanager.processCheckout({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
});
expect(window.showWarningMessage).toHaveBeenCalledWith(expect.anything(), 'Cancel', 'Continue', 'Reset');
expect(rmSync).toHaveBeenCalledWith('target', { recursive: true });
});
test('existing folder updatable and user update', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(window.showWarningMessage).mockResolvedValue('Update');
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'isRepositoryUpToDate').mockResolvedValue({ ok: false, updatable: true });
vi.spyOn(gitmanager, 'pull').mockResolvedValue(undefined);
await gitmanager.processCheckout({
repository: 'repo',
targetDirectory: 'target',
ref: '000',
});
expect(window.showWarningMessage).toHaveBeenCalledWith(expect.anything(), 'Cancel', 'Continue', 'Update');
expect(rmSync).not.toHaveBeenCalled();
expect(gitmanager.pull).toHaveBeenCalled();
});
});
describe('isRepositoryUpToDate', () => {
test('no remote defined', async () => {
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'other-repo',
},
]);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo');
expect(result.ok).toBeFalsy();
expect(result.error).toBe(
'The local repository does not have remote repo configured. Remotes: origin other-repo (fetch)',
);
});
test('detached invalid without ref', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.mocked(git.currentBranch).mockResolvedValue(undefined);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository is detached.');
});
test('detached invalid with invalid ref', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTagCommitId').mockResolvedValue(undefined); // ref is not a tag
vi.mocked(git.currentBranch).mockResolvedValue(undefined);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'invalidRef');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository is detached. HEAD is dummyCommit expected invalidRef.');
});
test('detached invalid with expected ref', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.mocked(git.statusMatrix).mockResolvedValue([['a', 1, 1, 1]]);
vi.mocked(git.currentBranch).mockResolvedValue(undefined);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit');
expect(result.ok).toBeTruthy();
expect(result.error).toBeUndefined();
});
test('detached with expected ref and modified files', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.mocked(git.statusMatrix).mockResolvedValue([
['a', 1, 1, 1],
['a_file', 1, 2, 1],
]);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has modified files.');
});
test('detached with expected ref and deleted files', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.mocked(git.statusMatrix).mockResolvedValue([
['a', 1, 1, 1],
['a_file', 1, 0, 1],
]);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has deleted files.');
});
test('detached with expected ref and created files', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.mocked(git.statusMatrix).mockResolvedValue([
['a', 1, 1, 1],
['a_file', 0, 2, 2],
]);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has created files.');
});
test('detached with expected ref and repository is not clean', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: false,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'dummyCommit');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository is not clean.');
});
test('using main branch and no local change', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeTruthy();
});
test('using main branch and tracking wrong branch', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/other-branch');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeFalsy();
expect(result.error).toBe(
'The local repository is not tracking the right branch. (tracking origin/other-branch when expected main)',
);
});
test('using main branch and ahead', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 1, ahead: 2 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has 2 commit(s) ahead.');
});
test('using main branch and behind', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 1, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeTruthy();
expect(result.updatable).toBeTruthy();
});
test('using main branch and modified files', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: ['a_modified_file.txt'],
created: [],
deleted: [],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has modified files.');
});
test('using main branch and deleted files', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: ['a_deleted_file.txt'],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has deleted files.');
});
test('using main branch and created files', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: ['a_created_file.txt'],
deleted: [],
clean: true,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository has created files.');
});
test('using main branch and repository is not clean', async () => {
const gitmanager = new GitManager();
vi.mocked(git.currentBranch).mockResolvedValue('main');
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
vi.spyOn(gitmanager, 'getBehindAhead').mockResolvedValue({ behind: 0, ahead: 0 });
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: false,
});
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'main');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository is not clean.');
});
});
test('using tag and no local change', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTagCommitId').mockResolvedValue('dummyCommit'); // ref is a tag and points to commit
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: true,
});
vi.mocked(git.currentBranch).mockResolvedValue(undefined);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'v1.0.0');
expect(result.ok).toBeTruthy();
});
test('using wrong tag', async () => {
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
const gitmanager = new GitManager();
vi.spyOn(gitmanager, 'getRepositoryRemotes').mockResolvedValue([
{
remote: 'origin',
url: 'repo',
},
]);
vi.spyOn(gitmanager, 'getTagCommitId').mockResolvedValue('otherCommit'); // ref is a tag and points to commit
vi.spyOn(gitmanager, 'getRepositoryStatus').mockResolvedValue({
modified: [],
created: [],
deleted: [],
clean: true,
});
vi.mocked(git.currentBranch).mockResolvedValue(undefined);
const result = await gitmanager.isRepositoryUpToDate('target', 'repo', 'v1.0.0');
expect(result.ok).toBeFalsy();
expect(result.error).toBe('The local repository is detached. HEAD is dummyCommit expected otherCommit.');
});
test('getBehindAhead', async () => {
const gitmanager = new GitManager();
vi.mocked(git.log).mockImplementation(async ({ ref }: { ref?: string }) => {
return new Promise(resolve => {
if (ref === 'main') {
resolve([
{
oid: '1',
},
{
oid: '6',
},
{
oid: '2',
},
{
oid: '3',
},
] as ReadCommitResult[]);
} else if (ref === 'origin/main') {
resolve([
{
oid: '1',
},
{
oid: '4',
},
{
oid: '2',
},
{
oid: '5',
},
{
oid: '3',
},
] as ReadCommitResult[]);
} else {
resolve([]);
}
});
});
vi.spyOn(gitmanager, 'getTrackingBranch').mockResolvedValue('origin/main');
const { behind, ahead } = await gitmanager.getBehindAhead('path/to/repo', 'main');
expect(behind).toEqual(2);
expect(ahead).toEqual(1);
});
test('getTrackingBranch', async () => {
const gitmanager = new GitManager();
vi.mocked(git.getConfig).mockImplementation(async ({ path }: { path: string }): Promise => {
if (path === 'branch.my-branch.remote') {
return 'origin';
} else if (path === 'branch.my-branch.merge') {
return 'refs/heads/my-remote-branch';
}
throw new Error('should never been reached');
});
const result = await gitmanager.getTrackingBranch('path/to/repository', 'my-branch');
expect(result).toEqual('origin/my-remote-branch');
});
================================================
FILE: packages/backend/src/managers/gitManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { window } from '@podman-desktop/api';
import fs, { statSync, existsSync, mkdirSync, rmSync } from 'node:fs';
import git from 'isomorphic-git';
import http from 'isomorphic-git/http/node';
export interface GitCloneInfo {
repository: string;
ref?: string;
targetDirectory: string;
}
export class GitManager {
async cloneRepository(gitCloneInfo: GitCloneInfo): Promise {
// clone repo
await git.clone({
fs,
http,
dir: gitCloneInfo.targetDirectory,
url: gitCloneInfo.repository,
ref: gitCloneInfo.ref,
singleBranch: true,
depth: 1,
});
}
async getRepositoryRemotes(directory: string): Promise<
{
remote: string;
url: string;
}[]
> {
return git.listRemotes({ fs, dir: directory });
}
/* see https://isomorphic-git.org/docs/en/statusMatrix
*
* - The HEAD status is either absent (0) or present (1).
* - The WORKDIR status is either absent (0), identical to HEAD (1), or different from HEAD (2).
* - The STAGE status is either absent (0), identical to HEAD (1), identical to WORKDIR (2), or different from WORKDIR (3).
*
* // example StatusMatrix
* [
* ["a.txt", 0, 2, 0], // new, untracked
* ["b.txt", 0, 2, 2], // added, staged
* ["c.txt", 0, 2, 3], // added, staged, with unstaged changes
* ["d.txt", 1, 1, 1], // unmodified
* ["e.txt", 1, 2, 1], // modified, unstaged
* ["f.txt", 1, 2, 2], // modified, staged
* ["g.txt", 1, 2, 3], // modified, staged, with unstaged changes
* ["h.txt", 1, 0, 1], // deleted, unstaged
* ["i.txt", 1, 0, 0], // deleted, staged
* ["j.txt", 1, 2, 0], // deleted, staged, with unstaged-modified changes (new file of the same name)
* ["k.txt", 1, 1, 0], // deleted, staged, with unstaged changes (new file of the same name)
* ]
*/
async getRepositoryStatus(directory: string): Promise<{
modified: string[];
created: string[];
deleted: string[];
clean: boolean;
}> {
const status = await git.statusMatrix({
fs,
dir: directory,
});
const FILE = 0,
HEAD = 1,
WORKDIR = 2,
STAGE = 3;
const created = status.filter(row => row[HEAD] === 0 && row[WORKDIR] === 2).map(row => row[FILE]);
const deleted = status
.filter(row => row[HEAD] === 1 && (row[WORKDIR] === 0 || row[STAGE] === 0))
.map(row => row[FILE]);
const modified = status.filter(row => row[HEAD] === 1 && row[WORKDIR] === 2).map(row => row[FILE]);
const notClean = status.filter(row => row[HEAD] !== 1 || row[WORKDIR] !== 1 || row[STAGE] !== 1);
return {
modified,
created,
deleted,
clean: notClean.length === 0,
};
}
async getCurrentCommit(directory: string): Promise {
return git.resolveRef({ fs, dir: directory, ref: 'HEAD' });
}
async pull(directory: string): Promise {
return git.pull({
fs,
http,
dir: directory,
});
}
async processCheckout(gitCloneInfo: GitCloneInfo): Promise {
// Check for existing cloned repository
if (existsSync(gitCloneInfo.targetDirectory) && statSync(gitCloneInfo.targetDirectory).isDirectory()) {
const result = await this.isRepositoryUpToDate(
gitCloneInfo.targetDirectory,
gitCloneInfo.repository,
gitCloneInfo.ref,
);
if (result.ok) {
return;
}
const error = `The repository "${gitCloneInfo.repository}" appears to have already been cloned and does not match the expected configuration: ${result.error}`;
// Ask user
const selected = await window.showWarningMessage(
`${error} By continuing, the AI application may not run as expected. `,
'Cancel',
'Continue',
result.updatable ? 'Update' : 'Reset',
);
switch (selected) {
case undefined:
case 'Cancel':
throw new Error('Cancelled');
case 'Continue':
return;
case 'Update':
await this.pull(gitCloneInfo.targetDirectory);
return;
case 'Reset':
rmSync(gitCloneInfo.targetDirectory, { recursive: true });
break;
}
}
// Create folder
mkdirSync(gitCloneInfo.targetDirectory, { recursive: true });
// Clone the repository
console.log(`Cloning repository ${gitCloneInfo.repository} in ${gitCloneInfo.targetDirectory}.`);
await this.cloneRepository(gitCloneInfo);
}
async isRepositoryUpToDate(
directory: string,
origin: string,
ref?: string,
): Promise<{ ok?: boolean; updatable?: boolean; error?: string }> {
// fetch updates
await git.fetch({
fs,
http,
dir: directory,
});
const remotes = await this.getRepositoryRemotes(directory);
if (!remotes.some(remote => remote.url === origin)) {
return {
error: `The local repository does not have remote ${origin} configured. Remotes: ${remotes
.map(remote => `${remote.remote} ${remote.url} (fetch)`)
.join(',')}`,
};
}
const branch = await git.currentBranch({
fs,
dir: directory,
});
if (!branch) {
// when the repository is detached
if (ref === undefined) {
return { error: 'The local repository is detached.' };
} else {
const tag = await this.getTagCommitId(directory, ref);
if (tag) {
ref = tag;
}
const commit = await this.getCurrentCommit(directory);
if (!commit.startsWith(ref)) {
return { error: `The local repository is detached. HEAD is ${commit} expected ${ref}.` };
}
}
}
if (branch) {
const tracking = await this.getTrackingBranch(directory, branch);
if (ref && tracking !== `origin/${ref}`) {
return {
error: `The local repository is not tracking the right branch. (tracking ${tracking} when expected ${ref})`,
};
}
const { behind, ahead } = await this.getBehindAhead(directory, branch);
if (ahead !== 0) {
return { error: `The local repository has ${ahead} commit(s) ahead.` };
}
if (behind !== 0) {
return { ok: true, updatable: true };
}
}
const status = await this.getRepositoryStatus(directory);
if (status.modified.length > 0) {
return { error: 'The local repository has modified files.' };
} else if (status.created.length > 0) {
return { error: 'The local repository has created files.' };
} else if (status.deleted.length > 0) {
return { error: 'The local repository has deleted files.' };
} else if (!status.clean) {
return { error: 'The local repository is not clean.' };
}
return { ok: true }; // If none of the error conditions are met
}
async getTrackingBranch(directory: string, branch: string): Promise {
const mergeRef = await git.getConfig({
fs,
dir: directory,
path: `branch.${branch}.merge`,
});
const remote = await git.getConfig({
fs,
dir: directory,
path: `branch.${branch}.remote`,
});
return mergeRef && remote ? `${remote}/${mergeRef.replace(/^refs\/heads\//, '')}` : undefined;
}
async getBehindAhead(dir: string, localBranch: string): Promise<{ behind: number; ahead: number }> {
const remoteBranch = await this.getTrackingBranch(dir, localBranch);
const remoteCommits = (
await git.log({
fs,
dir,
ref: remoteBranch,
})
)
.map(c => c.oid)
.sort((a, b) => a.localeCompare(b));
const localCommits = (
await git.log({
fs,
dir,
ref: localBranch,
})
)
.map(c => c.oid)
.sort((a, b) => a.localeCompare(b));
let behind = 0;
let ahead = 0;
while (remoteCommits.length && localCommits.length) {
const remote = remoteCommits.pop();
const local = localCommits.pop();
if (!remote || !local) {
break;
}
if (remote === local) {
continue;
}
if (remote > local) {
behind++;
localCommits.push(local);
} else {
ahead++;
remoteCommits.push(remote);
}
}
return {
behind: behind + remoteCommits.length,
ahead: ahead + localCommits.length,
};
}
async getTagCommitId(directory: string, tagName: string): Promise {
try {
return await git.resolveRef({
fs,
dir: directory,
ref: tagName,
});
} catch {
return undefined;
}
}
}
================================================
FILE: packages/backend/src/managers/inference/inferenceManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import {
containerEngine,
type ContainerInfo,
type ContainerInspectInfo,
type TelemetryLogger,
} from '@podman-desktop/api';
import type { ContainerRegistry } from '../../registries/ContainerRegistry';
import type { PodmanConnection } from '../podmanConnection';
import { beforeEach, describe, expect, test, vi } from 'vitest';
import { InferenceManager } from './inferenceManager';
import type { ModelsManager } from '../modelsManager';
import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils';
import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { InferenceProviderRegistry } from '../../registries/InferenceProviderRegistry';
import type { InferenceProvider } from '../../workers/provider/InferenceProvider';
import type { CatalogManager } from '../catalogManager';
import type { InferenceServer } from '@shared/models/IInference';
import { InferenceType } from '@shared/models/IInference';
import { VMType } from '@shared/models/IPodman';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_INFERENCE_SERVERS_UPDATE } from '@shared/Messages';
import * as randomUtils from '../../utils/randomUtils';
import type { Task } from '@shared/models/ITask';
vi.mock('@podman-desktop/api', async () => {
return {
containerEngine: {
startContainer: vi.fn(),
stopContainer: vi.fn(),
inspectContainer: vi.fn(),
deleteContainer: vi.fn(),
listContainers: vi.fn(),
},
Disposable: {
from: vi.fn(),
create: vi.fn(),
},
};
});
vi.mock('../../utils/randomUtils');
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
const containerRegistryMock = {
onStartContainerEvent: vi.fn(),
subscribe: vi.fn(),
} as unknown as ContainerRegistry;
const podmanConnectionMock = {
onPodmanConnectionEvent: vi.fn(),
findRunningContainerProviderConnection: vi.fn(),
} as unknown as PodmanConnection;
const modelsManager = {
getLocalModelPath: vi.fn(),
uploadModelToPodmanMachine: vi.fn(),
} as unknown as ModelsManager;
const telemetryMock = {
logUsage: vi.fn(),
logError: vi.fn(),
} as unknown as TelemetryLogger;
const taskRegistryMock = {
createTask: vi.fn(),
updateTask: vi.fn(),
getTasksByLabels: vi.fn(),
} as unknown as TaskRegistry;
const inferenceProviderRegistryMock = {
getAll: vi.fn(),
getByType: vi.fn(),
get: vi.fn(),
} as unknown as InferenceProviderRegistry;
const catalogManager = {
onUpdate: vi.fn(),
} as unknown as CatalogManager;
const getInitializedInferenceManager = async (): Promise => {
const manager = new InferenceManager(
rpcExtensionMock,
containerRegistryMock,
podmanConnectionMock,
modelsManager,
telemetryMock,
taskRegistryMock,
inferenceProviderRegistryMock,
catalogManager,
);
manager.init();
await vi.waitUntil(manager.isInitialize.bind(manager), {
interval: 200,
timeout: 2000,
});
return manager;
};
const mockListContainers = (containers: Partial[]): void => {
vi.mocked(containerEngine.listContainers).mockResolvedValue(containers as unknown as ContainerInfo[]);
};
beforeEach(() => {
vi.resetAllMocks();
// Default listContainers is empty
mockListContainers([]);
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
vi.mocked(containerEngine.inspectContainer).mockResolvedValue({
State: {
Status: 'running',
Health: undefined,
},
} as unknown as ContainerInspectInfo);
vi.mocked(podmanConnectionMock.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(taskRegistryMock.getTasksByLabels).mockReturnValue([]);
vi.mocked(modelsManager.getLocalModelPath).mockReturnValue('/local/model.guff');
vi.mocked(modelsManager.uploadModelToPodmanMachine).mockResolvedValue('/mnt/path/model.guff');
});
/**
* Testing the initialization of the manager
*/
describe('init Inference Manager', () => {
test('should be initialized without catalog events', async () => {
const manager = new InferenceManager(
rpcExtensionMock,
containerRegistryMock,
podmanConnectionMock,
modelsManager,
telemetryMock,
taskRegistryMock,
inferenceProviderRegistryMock,
catalogManager,
);
manager.init();
await vi.waitUntil(manager.isInitialize.bind(manager), {
interval: 200,
timeout: 2000,
});
});
test('should have listed containers', async () => {
const inferenceManager = await getInitializedInferenceManager();
expect(inferenceManager.isInitialize()).toBeTruthy();
expect(containerEngine.listContainers).toHaveBeenCalled();
});
test('should ignore containers without the proper label', async () => {
mockListContainers([
{
Id: 'dummyId',
},
]);
const inferenceManager = await getInitializedInferenceManager();
expect(inferenceManager.getServers().length).toBe(0);
});
test('should have adopted the existing container', async () => {
mockListContainers([
{
Id: 'dummyContainerId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const inferenceManager = await getInitializedInferenceManager();
expect(inferenceManager.getServers()).toStrictEqual([
{
connection: {
port: -1,
},
container: {
containerId: 'dummyContainerId',
engineId: 'dummyEngineId',
},
health: undefined,
models: [],
status: 'running',
type: expect.anything(),
labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
});
test('should have adopted all existing container with proper label', async () => {
mockListContainers([
{
Id: 'dummyContainerId-1',
engineId: 'dummyEngineId-1',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
{
Id: 'dummyContainerId-2',
engineId: 'dummyEngineId-2',
},
{
Id: 'dummyContainerId-3',
engineId: 'dummyEngineId-3',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const inferenceManager = await getInitializedInferenceManager();
const servers = inferenceManager.getServers();
expect(servers.length).toBe(2);
expect(servers.some(server => server.container.containerId === 'dummyContainerId-1')).toBeTruthy();
expect(servers.some(server => server.container.containerId === 'dummyContainerId-3')).toBeTruthy();
});
});
/**
* Testing the creation logic
*/
describe('Create Inference Server', () => {
test('no provider available should throw an error', async () => {
vi.mocked(inferenceProviderRegistryMock.getByType).mockReturnValue([]);
const inferenceManager = await getInitializedInferenceManager();
await expect(
inferenceManager.createInferenceServer({
inferenceProvider: undefined,
labels: {},
modelsInfo: [],
port: 8888,
}),
).rejects.toThrowError('no enabled provider could be found.');
});
test('inference provider provided should use get from InferenceProviderRegistry', async () => {
vi.mocked(inferenceProviderRegistryMock.get).mockReturnValue({
enabled: () => false,
} as unknown as InferenceProvider);
const inferenceManager = await getInitializedInferenceManager();
await expect(
inferenceManager.createInferenceServer({
inferenceProvider: 'dummy-inference-provider',
labels: {},
modelsInfo: [],
port: 8888,
}),
).rejects.toThrowError('provider requested is not enabled.');
expect(inferenceProviderRegistryMock.get).toHaveBeenCalledWith('dummy-inference-provider');
});
test('selected inference provider should receive config', async () => {
const provider: InferenceProvider = {
enabled: () => true,
name: 'dummy-inference-provider',
dispose: () => {},
prePerform: vi.fn().mockReturnValue(Promise.resolve()),
perform: vi.fn<() => InferenceServer>().mockResolvedValue({
container: {
containerId: 'dummy-container-id',
engineId: 'dummy-engine-id',
},
models: [],
status: 'running',
type: InferenceType.LLAMA_CPP,
connection: { port: 0 },
labels: {},
}),
} as unknown as InferenceProvider;
vi.mocked(inferenceProviderRegistryMock.get).mockReturnValue(provider);
const inferenceManager = await getInitializedInferenceManager();
const config: InferenceServerConfig = {
inferenceProvider: 'dummy-inference-provider',
labels: {},
modelsInfo: [],
port: 8888,
};
const result = await inferenceManager.createInferenceServer(config);
expect(provider.perform).toHaveBeenCalledWith(config);
expect(result).toBe('dummy-container-id');
});
});
/**
* Testing the starting logic
*/
describe('Start Inference Server', () => {
test('containerId unknown', async () => {
const inferenceManager = await getInitializedInferenceManager();
await expect(inferenceManager.startInferenceServer('unknownContainerId')).rejects.toThrowError(
'cannot find a corresponding server for container id unknownContainerId.',
);
});
test('valid containerId', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.startInferenceServer('dummyId');
expect(containerEngine.startContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId');
const servers = inferenceManager.getServers();
expect(servers.length).toBe(1);
expect(servers[0].status).toBe('running');
});
});
/**
* Testing the stopping logic
*/
describe('Stop Inference Server', () => {
test('containerId unknown', async () => {
const inferenceManager = await getInitializedInferenceManager();
await expect(inferenceManager.stopInferenceServer('unknownContainerId')).rejects.toThrowError(
'cannot find a corresponding server for container id unknownContainerId.',
);
});
test('valid containerId', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.stopInferenceServer('dummyId');
expect(containerEngine.stopContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId');
const servers = inferenceManager.getServers();
expect(servers.length).toBe(1);
expect(servers[0].status).toBe('stopped');
});
});
describe('Delete Inference Server', () => {
test('containerId unknown', async () => {
const inferenceManager = await getInitializedInferenceManager();
await expect(inferenceManager.deleteInferenceServer('unknownContainerId')).rejects.toThrowError(
'cannot find a corresponding server for container id unknownContainerId.',
);
});
test('valid running containerId', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.deleteInferenceServer('dummyId');
expect(containerEngine.stopContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId');
expect(containerEngine.deleteContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId');
const servers = inferenceManager.getServers();
expect(servers.length).toBe(0);
});
test('valid stopped containerId', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
vi.mocked(containerEngine.inspectContainer).mockResolvedValue({
State: {
Status: 'stopped',
Health: undefined,
},
} as unknown as ContainerInspectInfo);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.deleteInferenceServer('dummyId');
expect(containerEngine.stopContainer).not.toHaveBeenCalled();
expect(containerEngine.deleteContainer).toHaveBeenCalledWith('dummyEngineId', 'dummyId');
const servers = inferenceManager.getServers();
expect(servers.length).toBe(0);
});
});
describe('Request Create Inference Server', () => {
beforeEach(() => {
vi.mocked(randomUtils.getRandomString).mockReturnValue('random123');
});
test('Should return unique string identifier', async () => {
const inferenceManager = await getInitializedInferenceManager();
const identifier = inferenceManager.requestCreateInferenceServer({
port: 8888,
providerId: 'test@providerId',
image: 'quay.io/bootsy/playground:v0',
modelsInfo: [
{
id: 'dummyModelId',
file: {
file: 'dummyFile',
path: 'dummyPath',
},
},
],
} as unknown as InferenceServerConfig);
expect(identifier).toBeDefined();
expect(typeof identifier).toBe('string');
});
test('Task registry should have tasks matching unique identifier provided', async () => {
const inferenceManager = await getInitializedInferenceManager();
const identifier = inferenceManager.requestCreateInferenceServer({
port: 8888,
providerId: 'test@providerId',
image: 'quay.io/bootsy/playground:v0',
modelsInfo: [
{
id: 'dummyModelId',
file: {
file: 'dummyFile',
path: 'dummyPath',
},
},
],
} as unknown as InferenceServerConfig);
expect(taskRegistryMock.createTask).toHaveBeenNthCalledWith(1, 'Creating Inference server', 'loading', {
trackingId: identifier,
});
});
test('all children tasks should be set as error when one fails', async () => {
const inferenceManager = await getInitializedInferenceManager();
vi.mocked(taskRegistryMock.createTask).mockReturnValue({
id: 'task1',
name: 'Task 1',
state: 'loading',
});
vi.spyOn(inferenceManager, 'createInferenceServer');
const otherTasks: Task[] = [
{
id: 'subtask1',
name: 'Sub task 1',
state: 'loading',
},
{
id: 'subtask2',
name: 'Sub task 2',
state: 'loading',
},
{
id: 'subtask3',
name: 'Sub task 3',
state: 'error',
},
];
vi.mocked(taskRegistryMock.getTasksByLabels).mockReturnValue(otherTasks);
vi.mocked(inferenceManager.createInferenceServer).mockRejectedValue('an error');
inferenceManager.requestCreateInferenceServer({
port: 8888,
providerId: 'test@providerId',
image: 'quay.io/bootsy/playground:v0',
modelsInfo: [
{
id: 'dummyModelId',
file: {
file: 'dummyFile',
path: 'dummyPath',
},
},
],
} as unknown as InferenceServerConfig);
await vi.waitFor(() => {
expect(taskRegistryMock.updateTask).toHaveBeenCalledTimes(3);
});
expect(taskRegistryMock.updateTask).toHaveBeenNthCalledWith(1, { ...otherTasks[0], state: 'error' });
expect(taskRegistryMock.updateTask).toHaveBeenNthCalledWith(2, { ...otherTasks[1], state: 'error' });
expect(taskRegistryMock.updateTask).toHaveBeenNthCalledWith(3, {
error: 'Something went wrong while trying to create an inference server an error.',
id: 'task1',
name: 'Task 1',
state: 'error',
});
});
});
describe('containerRegistry events', () => {
test('container die event', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const disposableMock = vi.fn();
const deferred = new Promise<(status: string) => void>((resolve, reject) => {
vi.mocked(containerRegistryMock.subscribe).mockImplementation((containerId, listener) => {
if (containerId !== 'dummyId') reject(new Error('invalid container id'));
else resolve(listener);
return {
dispose: disposableMock,
};
});
});
const inferenceManager = await getInitializedInferenceManager();
const listener = await deferred;
const server = inferenceManager.get('dummyId');
expect(server?.status).toBe('running');
expect(containerEngine.inspectContainer).toHaveBeenCalledOnce();
vi.mocked(containerEngine.inspectContainer).mockResolvedValue({
State: {
Status: 'stopped',
Health: undefined,
},
} as unknown as ContainerInspectInfo);
listener('die');
await vi.waitFor(() => {
expect(inferenceManager.get('dummyId')?.status).toBe('stopped');
expect(containerEngine.inspectContainer).toHaveBeenCalledTimes(2);
});
// we should not have disposed the subscriber, as the container is only stopped, not removed
expect(disposableMock).not.toHaveBeenCalled();
});
test('container remove event', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
const disposableMock = vi.fn();
const deferred = new Promise<(status: string) => void>((resolve, reject) => {
vi.mocked(containerRegistryMock.subscribe).mockImplementation((containerId, listener) => {
if (containerId !== 'dummyId') reject(new Error('invalid container id'));
else resolve(listener);
return {
dispose: disposableMock,
};
});
});
const inferenceManager = await getInitializedInferenceManager();
const listener = await deferred;
const server = inferenceManager.get('dummyId');
expect(server?.status).toBe('running');
listener('remove');
await vi.waitFor(() => {
expect(inferenceManager.get('dummyId')).toBeUndefined();
});
// we should have disposed the subscriber, as the container is removed
expect(disposableMock).toHaveBeenCalled();
});
});
describe('transition statuses', () => {
test('stopping an inference server should first set status to stopping', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
vi.mocked(containerEngine.inspectContainer).mockResolvedValue({
State: {
Status: 'running',
Health: undefined,
},
} as unknown as ContainerInspectInfo);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.stopInferenceServer('dummyId');
// first called with stopping status
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [
{
connection: expect.anything(),
container: expect.anything(),
models: expect.anything(),
health: undefined,
status: 'stopping',
type: expect.anything(),
labels: expect.anything(),
},
]);
// finally have been called with status stopped
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [
{
connection: expect.anything(),
container: expect.anything(),
models: expect.anything(),
health: undefined,
status: 'stopped',
type: expect.anything(),
labels: expect.anything(),
},
]);
});
test('deleting an inference server should first set status to stopping', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
vi.mocked(containerEngine.inspectContainer).mockResolvedValue({
State: {
Status: 'running',
Health: undefined,
},
} as unknown as ContainerInspectInfo);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.deleteInferenceServer('dummyId');
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [
{
connection: expect.anything(),
container: expect.anything(),
models: expect.anything(),
health: undefined,
status: 'deleting',
type: expect.anything(),
labels: expect.anything(),
},
]);
});
test('starting an inference server should first set status to stopping', async () => {
mockListContainers([
{
Id: 'dummyId',
engineId: 'dummyEngineId',
Labels: {
[LABEL_INFERENCE_SERVER]: '[]',
},
},
]);
vi.mocked(containerEngine.inspectContainer).mockResolvedValue({
State: {
Status: 'stopped',
Health: undefined,
},
} as unknown as ContainerInspectInfo);
const inferenceManager = await getInitializedInferenceManager();
await inferenceManager.startInferenceServer('dummyId');
// first status must be set to starting
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [
{
connection: expect.anything(),
container: expect.anything(),
models: expect.anything(),
health: undefined,
status: 'starting',
type: expect.anything(),
labels: expect.anything(),
},
]);
// on success it should have been set to running
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_INFERENCE_SERVERS_UPDATE, [
{
connection: expect.anything(),
container: expect.anything(),
models: expect.anything(),
health: undefined,
status: 'running',
type: expect.anything(),
labels: expect.anything(),
},
]);
});
});
================================================
FILE: packages/backend/src/managers/inference/inferenceManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { InferenceServer, InferenceServerStatus, InferenceType } from '@shared/models/IInference';
import type { PodmanConnection, PodmanConnectionEvent } from '../podmanConnection';
import { containerEngine, Disposable } from '@podman-desktop/api';
import type { ContainerInfo, TelemetryLogger, ContainerProviderConnection } from '@podman-desktop/api';
import type { ContainerRegistry, ContainerEvent } from '../../registries/ContainerRegistry';
import { getInferenceType, isTransitioning, LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils';
import { Publisher } from '../../utils/Publisher';
import { MSG_INFERENCE_SERVERS_UPDATE } from '@shared/Messages';
import type { InferenceServerConfig } from '@shared/models/InferenceServerConfig';
import type { ModelsManager } from '../modelsManager';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import { getRandomString } from '../../utils/randomUtils';
import { basename, dirname } from 'node:path';
import type { InferenceProviderRegistry } from '../../registries/InferenceProviderRegistry';
import type { InferenceProvider } from '../../workers/provider/InferenceProvider';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { CatalogManager } from '../catalogManager';
import { getHash } from '../../utils/sha';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { TaskRunner } from '../TaskRunner';
export class InferenceManager extends Publisher implements Disposable {
// Inference server map (containerId -> InferenceServer)
#servers: Map;
// Is initialized
#initialized: boolean;
// Disposables
#disposables: Disposable[];
#taskRunner: TaskRunner;
constructor(
rpcExtension: RpcExtension,
private containerRegistry: ContainerRegistry,
private podmanConnection: PodmanConnection,
private modelsManager: ModelsManager,
private telemetry: TelemetryLogger,
private taskRegistry: TaskRegistry,
private inferenceProviderRegistry: InferenceProviderRegistry,
private catalogManager: CatalogManager,
) {
super(rpcExtension, MSG_INFERENCE_SERVERS_UPDATE, () => this.getServers());
this.#servers = new Map();
this.#disposables = [];
this.#initialized = false;
this.#taskRunner = new TaskRunner(this.taskRegistry);
}
init(): void {
this.podmanConnection.onPodmanConnectionEvent(this.watchMachineEvent.bind(this));
this.containerRegistry.onStartContainerEvent(this.watchContainerStart.bind(this));
this.catalogManager.onUpdate(() => {
this.retryableRefresh(1);
});
this.retryableRefresh(3);
}
public isInitialize(): boolean {
return this.#initialized;
}
/**
* Cleanup the manager
*/
dispose(): void {
this.cleanDisposables();
this.#servers.clear();
this.#initialized = false;
}
/**
* Clean class disposables
*/
private cleanDisposables(): void {
this.#disposables.forEach(disposable => disposable.dispose());
}
/**
* Get the Inference servers
*/
public getServers(): InferenceServer[] {
return Array.from(this.#servers.values());
}
/**
* Get the Unique registered Inference provider types
*/
public getRegisteredProviders(): InferenceType[] {
const types: InferenceType[] = this.inferenceProviderRegistry.getAll().map(provider => provider.type);
return [...new Set(types)];
}
/**
* return an inference server
* @param containerId the containerId of the inference server
*/
public get(containerId: string): InferenceServer | undefined {
return this.#servers.get(containerId);
}
/**
* return the first inference server which is using the specific model
* it throws if the model backend is not currently supported
*/
public findServerByModel(model: ModelInfo): InferenceServer | undefined {
// check if model backend is supported
const backend: InferenceType = getInferenceType([model]);
const providers: InferenceProvider[] = this.inferenceProviderRegistry
.getByType(backend)
.filter(provider => provider.enabled());
if (providers.length === 0) {
throw new Error('no enabled provider could be found.');
}
return this.getServers().find(s => s.models.some(m => m.id === model.id));
}
/**
* Creating an inference server can be heavy task (pulling image, uploading model to WSL etc.)
* The frontend cannot wait endlessly, therefore we provide a method returning a tracking identifier
* that can be used to fetch the tasks
*
* @param config the config to use to create the inference server
*
* @return a unique tracking identifier to follow the creation request
*/
requestCreateInferenceServer(config: InferenceServerConfig): string {
// create a tracking id to put in the labels
const trackingId: string = getRandomString();
config.labels = {
...config.labels,
trackingId: trackingId,
};
this.#taskRunner
.runAsTask(
{
trackingId: trackingId,
},
{
loadingLabel: 'Creating Inference server',
errorMsg: err => `Something went wrong while trying to create an inference server ${String(err)}.`,
failFastSubtasks: true,
},
async ({ updateLabels }) => {
const containerId = await this.createInferenceServer(config);
updateLabels(labels => ({ ...labels, containerId }));
},
)
.catch(() => {});
return trackingId;
}
/**
* Given an engineId, it will create an inference server using an InferenceProvider.
* @param config
*
* @return the containerId of the created inference server
*/
async createInferenceServer(config: InferenceServerConfig): Promise {
if (!this.isInitialize()) throw new Error('Cannot start the inference server: not initialized.');
// Get the backend for the model inference server {@link InferenceType}
const backend: InferenceType = getInferenceType(config.modelsInfo);
let provider: InferenceProvider;
if (config.inferenceProvider) {
provider = this.inferenceProviderRegistry.get(config.inferenceProvider);
if (!provider.enabled()) throw new Error('provider requested is not enabled.');
} else {
const providers: InferenceProvider[] = this.inferenceProviderRegistry
.getByType(backend)
.filter(provider => provider.enabled());
if (providers.length === 0) throw new Error('no enabled provider could be found.');
provider = providers[0];
}
let connection: ContainerProviderConnection | undefined = undefined;
if (config.connection) {
connection = this.podmanConnection.getContainerProviderConnection(config.connection);
} else {
connection = this.podmanConnection.findRunningContainerProviderConnection();
}
if (!connection) throw new Error('cannot find running container provider connection');
await provider.prePerform(config);
// upload models to podman machine if user system is supported
config.modelsInfo = await Promise.all(
config.modelsInfo.map(modelInfo =>
this.modelsManager.uploadModelToPodmanMachine(connection, modelInfo, config.labels).then(path => ({
...modelInfo,
file: {
path: dirname(path),
file: basename(path),
},
})),
),
);
// create the inference server using the selected inference provider
const inferenceServer = await provider.perform(config);
// Adding a new inference server
this.#servers.set(inferenceServer.container.containerId, inferenceServer);
// Watch for container changes
this.watchContainerStatus(inferenceServer.container.engineId, inferenceServer.container.containerId);
// Log usage
this.telemetry.logUsage('inference.start', {
models: config.modelsInfo.map(model => getHash(model.id)),
});
this.notify();
return inferenceServer.container.containerId;
}
/**
* Given an engineId and a containerId, inspect the container and update the servers
* @param engineId
* @param containerId
* @private
*/
private updateServerStatus(engineId: string, containerId: string): void {
const server = this.#servers.get(containerId);
if (server === undefined)
throw new Error('Something went wrong while trying to get container status got undefined Inference Server.');
// we should not update the server while we are in a transition state.
if (isTransitioning(server)) return;
// Inspect container
containerEngine
.inspectContainer(engineId, containerId)
.then(result => {
// Update server
this.#servers.set(containerId, {
...server,
status: result.State.Status === 'running' ? 'running' : 'stopped',
health: result.State.Health,
});
this.notify();
})
.catch((err: unknown) => {
console.error(
`Something went wrong while trying to inspect container ${containerId}. Trying to refresh servers.`,
err,
);
this.retryableRefresh(2);
});
}
/**
* Watch for container status changes
* @param engineId
* @param containerId the container to watch out
*/
private watchContainerStatus(engineId: string, containerId: string): void {
// Update now
this.updateServerStatus(engineId, containerId);
// Create a pulling update for container health check
const intervalId = setInterval(this.updateServerStatus.bind(this, engineId, containerId), 10000);
this.#disposables.push(
Disposable.create(() => {
clearInterval(intervalId);
}),
);
// Subscribe to container status update
const disposable = this.containerRegistry.subscribe(containerId, (status: string) => {
switch (status) {
case 'die':
this.updateServerStatus(engineId, containerId);
clearInterval(intervalId);
break;
case 'remove':
// Update the list of servers
this.removeInferenceServer(containerId);
disposable.dispose();
clearInterval(intervalId);
break;
}
});
// Allowing cleanup if extension is stopped
this.#disposables.push(disposable);
}
private watchMachineEvent(_event: PodmanConnectionEvent): void {
this.retryableRefresh(2);
}
/**
* Listener for container start events
* @param event the event containing the id of the container
*/
private watchContainerStart(event: ContainerEvent): void {
// We might have a start event for an inference server we already know about
if (this.#servers.has(event.id)) return;
containerEngine
.listContainers()
.then(containers => {
const container = containers.find(c => c.Id === event.id);
if (container === undefined) {
return;
}
if (container.Labels && LABEL_INFERENCE_SERVER in container.Labels) {
this.watchContainerStatus(container.engineId, container.Id);
}
})
.catch((err: unknown) => {
console.error(`Something went wrong in container start listener.`, err);
});
}
/**
* This non-async utility method is made to retry refreshing the inference server with some delay
* in case of error raised.
*
* @param retry the number of retry allowed
*/
private retryableRefresh(retry: number = 3): void {
if (retry === 0) {
console.error('Cannot refresh inference servers: retry limit has been reached. Cleaning manager.');
this.cleanDisposables();
this.#servers.clear();
this.#initialized = false;
return;
}
this.refreshInferenceServers().catch((err: unknown): void => {
console.warn(`Something went wrong while trying to refresh inference server. (retry left ${retry})`, err);
setTimeout(
() => {
this.retryableRefresh(retry - 1);
},
// eslint-disable-next-line sonarjs/pseudo-random
2000 + Math.random() * 1000,
);
});
}
/**
* Refresh the inference servers by listing all containers.
*
* This method has an important impact as it (re-)create all inference servers
*/
private async refreshInferenceServers(): Promise {
const containers: ContainerInfo[] = await containerEngine.listContainers();
const filtered = containers.filter(c => c.Labels && LABEL_INFERENCE_SERVER in c.Labels);
// clean existing disposables
this.cleanDisposables();
this.#servers = new Map(
filtered.map(containerInfo => {
let modelInfos: ModelInfo[] = [];
try {
const modelIds: string[] = JSON.parse(containerInfo.Labels[LABEL_INFERENCE_SERVER]);
modelInfos = modelIds
.filter(id => this.modelsManager.isModelOnDisk(id))
.map(id => this.modelsManager.getModelInfo(id));
} catch (err: unknown) {
console.error('Something went wrong while getting the models ids from the label.', err);
}
return [
containerInfo.Id,
{
container: {
containerId: containerInfo.Id,
engineId: containerInfo.engineId,
},
connection: {
port: !!containerInfo.Ports && containerInfo.Ports.length > 0 ? containerInfo.Ports[0].PublicPort : -1,
},
status: containerInfo.Status === 'running' ? 'running' : 'stopped',
models: modelInfos,
type: getInferenceType(modelInfos),
labels: containerInfo.Labels || {},
},
];
}),
);
// (re-)create container watchers
this.#servers.forEach(server => this.watchContainerStatus(server.container.engineId, server.container.containerId));
this.#initialized = true;
// notify update
this.notify();
}
/**
* Remove the reference of the inference server
* /!\ Does not delete the corresponding container
* @param containerId
*/
private removeInferenceServer(containerId: string): void {
this.#servers.delete(containerId);
this.notify();
}
/**
* Delete the InferenceServer instance from #servers and matching container
* @param containerId the id of the container running the Inference Server
*/
async deleteInferenceServer(containerId: string): Promise {
const server = this.#servers.get(containerId);
if (!server) {
throw new Error(`cannot find a corresponding server for container id ${containerId}.`);
}
try {
// Set status a deleting
this.setInferenceServerStatus(server.container.containerId, 'deleting');
// If the server is running we need to stop it.
if (server.status === 'running') {
await containerEngine.stopContainer(server.container.engineId, server.container.containerId);
}
// Delete the container
await containerEngine.deleteContainer(server.container.engineId, server.container.containerId);
// Delete the reference
this.removeInferenceServer(containerId);
} catch (err: unknown) {
console.error('Something went wrong while trying to delete the inference server.', err);
this.setInferenceServerStatus(server.container.containerId, 'error');
this.retryableRefresh(2);
}
}
/**
* Start an inference server from the container id
* @param containerId the identifier of the container to start
*/
async startInferenceServer(containerId: string): Promise {
if (!this.isInitialize()) throw new Error('Cannot start the inference server.');
const server = this.#servers.get(containerId);
if (server === undefined) throw new Error(`cannot find a corresponding server for container id ${containerId}.`);
try {
// set status to starting
this.setInferenceServerStatus(server.container.containerId, 'starting');
await containerEngine.startContainer(server.container.engineId, server.container.containerId);
this.setInferenceServerStatus(server.container.containerId, 'running');
// start watch for container status update
this.watchContainerStatus(server.container.engineId, server.container.containerId);
} catch (error: unknown) {
console.error(error);
this.telemetry.logError('inference.start', {
message: 'error starting inference',
error: error,
});
this.setInferenceServerStatus(server.container.containerId, 'error');
this.retryableRefresh(1);
}
}
/**
* Stop an inference server from the container id
* @param containerId the identifier of the container to stop
*/
async stopInferenceServer(containerId: string): Promise {
if (!this.isInitialize()) throw new Error('Cannot stop the inference server.');
const server = this.#servers.get(containerId);
if (server === undefined) throw new Error(`cannot find a corresponding server for container id ${containerId}.`);
if (isTransitioning(server)) throw new Error(`cannot stop a transitioning server.`);
try {
// set server to stopping
this.setInferenceServerStatus(server.container.containerId, 'stopping');
await containerEngine.stopContainer(server.container.engineId, server.container.containerId);
// once stopped update the status
this.setInferenceServerStatus(server.container.containerId, 'stopped');
} catch (error: unknown) {
console.error(error);
this.telemetry.logError('inference.stop', {
message: 'error stopping inference',
error: error,
});
this.setInferenceServerStatus(server.container.containerId, 'error');
this.retryableRefresh(1);
}
}
/**
* Given an containerId, set the status of the corresponding inference server
* @param containerId
* @param status
*/
private setInferenceServerStatus(containerId: string, status: InferenceServerStatus): void {
const server = this.#servers.get(containerId);
if (server === undefined) throw new Error(`cannot find a corresponding server for container id ${containerId}.`);
this.#servers.set(server.container.containerId, {
...server,
status: status,
health: undefined, // always reset health history when changing status
});
this.notify();
}
}
================================================
FILE: packages/backend/src/managers/instructlab/instructlabManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { TaskRegistry } from '../../registries/TaskRegistry';
import { beforeAll, beforeEach, expect, test, vi } from 'vitest';
import type { ContainerCreateResult, ContainerInfo, ImageInfo, TelemetryLogger } from '@podman-desktop/api';
import { containerEngine, EventEmitter } from '@podman-desktop/api';
import type { PodmanConnection } from '../podmanConnection';
import { INSTRUCTLAB_CONTAINER_LABEL, InstructlabManager } from './instructlabManager';
import { ContainerRegistry } from '../../registries/ContainerRegistry';
import { TestEventEmitter } from '../../tests/utils';
import { VMType } from '@shared/models/IPodman';
import type { Task } from '@shared/models/ITask';
import instructlab_images from '../../assets/instructlab-images.json';
import { INSTRUCTLAB_CONTAINER_TRACKINGID } from '@shared/models/instructlab/IInstructlabContainerInfo';
import type { RpcExtension } from '@shared/messages/MessageProxy';
vi.mock('@podman-desktop/api', () => {
return {
EventEmitter: vi.fn(),
containerEngine: {
listContainers: vi.fn(),
listImages: vi.fn(),
createContainer: vi.fn(),
onEvent: vi.fn(),
},
};
});
const taskRegistry = new TaskRegistry({ fire: vi.fn().mockResolvedValue(true) } as unknown as RpcExtension);
const podmanConnection: PodmanConnection = {
onPodmanConnectionEvent: vi.fn(),
findRunningContainerProviderConnection: vi.fn(),
} as unknown as PodmanConnection;
const telemetryMock = {
logUsage: vi.fn(),
logError: vi.fn(),
} as unknown as TelemetryLogger;
let instructlabManager: InstructlabManager;
beforeAll(() => {
vi.mocked(EventEmitter).mockImplementation(() => new TestEventEmitter() as unknown as EventEmitter);
});
beforeEach(() => {
const containerRegistry = new ContainerRegistry();
containerRegistry.init();
instructlabManager = new InstructlabManager('', taskRegistry, podmanConnection, containerRegistry, telemetryMock);
instructlabManager.init();
taskRegistry.deleteByLabels({ trackingId: INSTRUCTLAB_CONTAINER_TRACKINGID });
});
test('getInstructLabContainer should return undefined if no containers', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
const containerId = await instructlabManager.getInstructLabContainer();
expect(containerId).toBeUndefined();
});
test('getInstructLabContainer should return undefined if no instructlab container', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([{ Id: 'dummyId' } as unknown as ContainerInfo]);
const containerId = await instructlabManager.getInstructLabContainer();
expect(containerId).toBeUndefined();
});
test('getInstructLabContainer should return id if instructlab container', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([
{
Id: 'dummyId',
State: 'running',
Labels: { [`${INSTRUCTLAB_CONTAINER_LABEL}`]: 'dummyLabel' },
} as unknown as ContainerInfo,
]);
const containerId = await instructlabManager.getInstructLabContainer();
expect(containerId).toBe('dummyId');
});
test('requestCreateInstructlabContainer throws error if no podman connection', async () => {
const containerIdPromise = instructlabManager.requestCreateInstructlabContainer({});
await expect(containerIdPromise).rejects.toBeInstanceOf(Error);
});
async function waitTasks(id: string, nb: number): Promise {
return vi.waitFor(() => {
const tasks = taskRegistry.getTasksByLabels({ trackingId: id });
if (tasks.length !== nb) {
throw new Error('not completed');
}
return tasks;
});
}
test('requestCreateInstructlabContainer returns id and error if listImage returns error', async () => {
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockRejectedValue(new Error());
await instructlabManager.requestCreateInstructlabContainer({});
const tasks = await waitTasks(INSTRUCTLAB_CONTAINER_TRACKINGID, 2);
expect(tasks.some(task => task.state === 'error')).toBeTruthy();
});
test('requestCreateInstructlabContainer returns id and error if listImage returns image', async () => {
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{ RepoTags: [instructlab_images.default] } as unknown as ImageInfo,
]);
await instructlabManager.requestCreateInstructlabContainer({});
const tasks = await waitTasks(INSTRUCTLAB_CONTAINER_TRACKINGID, 3);
expect(tasks.some(task => task.state === 'error')).toBeTruthy();
});
test('requestCreateInstructlabContainer returns id and no error if createContainer returns id', async () => {
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{ RepoTags: [instructlab_images.default] } as unknown as ImageInfo,
]);
vi.mocked(containerEngine.createContainer).mockResolvedValue({
id: 'containerId',
} as unknown as ContainerCreateResult);
await instructlabManager.requestCreateInstructlabContainer({});
const tasks = await waitTasks(INSTRUCTLAB_CONTAINER_TRACKINGID, 3);
expect(tasks.some(task => task.state === 'error')).toBeFalsy();
});
================================================
FILE: packages/backend/src/managers/instructlab/instructlabManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { InstructlabSession } from '@shared/models/instructlab/IInstructlabSession';
import type { InstructlabContainerConfiguration } from '@shared/models/instructlab/IInstructlabContainerConfiguration';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import {
type TelemetryLogger,
containerEngine,
type ContainerProviderConnection,
type ContainerCreateOptions,
type Disposable,
} from '@podman-desktop/api';
import type { PodmanConnection, PodmanConnectionEvent } from '../podmanConnection';
import instructlab_images from '../../assets/instructlab-images.json';
import { getImageInfo } from '../../utils/inferenceUtils';
import path from 'node:path';
import fs from 'node:fs/promises';
import type { ContainerRegistry, ContainerEvent } from '../../registries/ContainerRegistry';
import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils';
import { INSTRUCTLAB_CONTAINER_TRACKINGID } from '@shared/models/instructlab/IInstructlabContainerInfo';
import { getRandomName } from '../../utils/randomUtils';
export const INSTRUCTLAB_CONTAINER_LABEL = 'ai-lab-instructlab-container';
export class InstructlabManager implements Disposable {
#initialized: boolean;
#containerId: string | undefined;
#disposables: Disposable[];
constructor(
private readonly appUserDirectory: string,
private taskRegistry: TaskRegistry,
private podmanConnection: PodmanConnection,
private containerRegistry: ContainerRegistry,
private telemetryLogger: TelemetryLogger,
) {
this.#initialized = false;
this.#disposables = [];
}
init(): void {
this.#disposables.push(this.podmanConnection.onPodmanConnectionEvent(this.watchMachineEvent.bind(this)));
this.#disposables.push(this.containerRegistry.onStartContainerEvent(this.onStartContainerEvent.bind(this)));
this.#disposables.push(this.containerRegistry.onStopContainerEvent(this.onStopContainerEvent.bind(this)));
}
dispose(): void {
this.#disposables.forEach(disposable => disposable.dispose());
this.#disposables = [];
}
private async refreshInstructlabContainer(id?: string): Promise {
const containers = await containerEngine.listContainers();
const containerId = (this.#containerId = containers
.filter(c => !id || c.Id === id)
.filter(c => c.State === 'running' && c.Labels && INSTRUCTLAB_CONTAINER_LABEL in c.Labels)
.map(c => c.Id)
.at(0));
if ((id && containerId) || !id) {
this.#containerId = containerId;
}
}
private async watchMachineEvent(event: PodmanConnectionEvent): Promise {
if ((event.status === 'started' && !this.#containerId) || (event.status === 'stopped' && this.#containerId)) {
await this.refreshInstructlabContainer();
}
}
private async onStartContainerEvent(event: ContainerEvent): Promise {
await this.refreshInstructlabContainer(event.id);
}
private onStopContainerEvent(event: ContainerEvent): void {
console.log('event id:', event.id, ' containerId: ', this.#containerId);
if (this.#containerId === event.id) {
this.#containerId = undefined;
this.taskRegistry.deleteByLabels({ trackingId: INSTRUCTLAB_CONTAINER_TRACKINGID });
}
}
public getSessions(): InstructlabSession[] {
return [
{
name: 'session 1',
modelId: 'hf.facebook.detr-resnet-101',
targetModel: 'hf.facebook.detr-resnet-101-target',
repository: '/a1',
status: 'fine-tuned',
createdTime: new Date(new Date().getTime() - 6 * 24 * 60 * 60 * 1000).getTime() / 1000, // 6 days ago
},
{
name: 'session 2',
modelId: 'hf.ibm-granite.granite-8b-code-instruct',
targetModel: 'hf.ibm-granite.granite-8b-code-instruct-target',
repository: '/a2',
status: 'generating-instructions',
createdTime: new Date(new Date().getTime() - 4 * 60 * 60 * 1000).getTime() / 1000, // 4 hours ago
},
];
}
async getInstructLabContainer(): Promise {
if (!this.#initialized) {
const containers = await containerEngine.listContainers();
this.#containerId = containers
.filter(c => c.State === 'running' && c.Labels && INSTRUCTLAB_CONTAINER_LABEL in c.Labels)
.map(c => c.Id)
.at(0);
this.#initialized = true;
}
return this.#containerId;
}
async requestCreateInstructlabContainer(config: InstructlabContainerConfiguration): Promise {
// create a tracking id to put in the labels
const trackingId: string = INSTRUCTLAB_CONTAINER_TRACKINGID;
const labels = {
trackingId: trackingId,
};
const task = this.taskRegistry.createTask('Creating InstructLab container', 'loading', {
trackingId: trackingId,
});
let connection: ContainerProviderConnection | undefined;
if (config.connection) {
connection = this.podmanConnection.getContainerProviderConnection(config.connection);
} else {
connection = this.podmanConnection.findRunningContainerProviderConnection();
}
if (!connection) throw new Error('cannot find running container provider connection');
this.createInstructlabContainer(connection, labels)
.then((containerId: string) => {
this.#containerId = containerId;
this.taskRegistry.updateTask({
...task,
state: 'success',
labels: {
...task.labels,
containerId: containerId,
},
});
this.telemetryLogger.logUsage('instructlab.startContainer');
})
.catch((err: unknown) => {
// Get all tasks using the tracker
const tasks = this.taskRegistry.getTasksByLabels({
trackingId: trackingId,
});
// Filter the one no in loading state
tasks
.filter(t => t.state === 'loading' && t.id !== task.id)
.forEach(t => {
this.taskRegistry.updateTask({
...t,
state: 'error',
});
});
// Update the main task
this.taskRegistry.updateTask({
...task,
state: 'error',
error: `Something went wrong while trying to create an inference server ${String(err)}.`,
});
this.telemetryLogger.logError('instructlab.startContainer', { error: err });
});
}
async createInstructlabContainer(
connection: ContainerProviderConnection,
labels: { [p: string]: string },
): Promise {
const image = instructlab_images.default;
const pullingTask = this.taskRegistry.createTask(`Pulling ${image}.`, 'loading', labels);
const imageInfo = await getImageInfo(connection, image, () => {})
.catch((err: unknown) => {
pullingTask.state = 'error';
pullingTask.progress = undefined;
pullingTask.error = `Something went wrong while pulling ${image}: ${String(err)}`;
throw err;
})
.then(imageInfo => {
pullingTask.state = 'success';
pullingTask.progress = undefined;
return imageInfo;
})
.finally(() => {
this.taskRegistry.updateTask(pullingTask);
});
const folder = await this.getInstructLabContainerFolder();
const containerTask = this.taskRegistry.createTask('Starting InstructLab container', 'loading', labels);
const createContainerOptions: ContainerCreateOptions = {
Image: imageInfo.Id,
name: getRandomName('instructlab'),
Labels: { [INSTRUCTLAB_CONTAINER_LABEL]: image },
HostConfig: {
AutoRemove: true,
SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION],
Mounts: [
{
Target: '/instructlab/.cache/instructlab',
Source: path.join(folder, '.cache'),
Type: 'bind',
},
{
Target: '/instructlab/.config/instructlab',
Source: path.join(folder, '.config'),
Type: 'bind',
},
{
Target: '/instructlab/.local/share/instructlab',
Source: path.join(folder, '.local'),
Type: 'bind',
},
],
UsernsMode: 'keep-id:uid=1000,gid=1000',
},
OpenStdin: true,
start: true,
};
try {
const { id } = await containerEngine.createContainer(imageInfo.engineId, createContainerOptions);
// update the task
containerTask.state = 'success';
containerTask.progress = undefined;
return id;
} catch (err: unknown) {
containerTask.state = 'error';
containerTask.progress = undefined;
containerTask.error = `Something went wrong while creating container: ${String(err)}`;
throw err;
} finally {
this.taskRegistry.updateTask(containerTask);
}
}
private async getInstructLabContainerFolder(): Promise {
const instructlabPath = path.join(this.appUserDirectory, 'instructlab', 'container');
await fs.mkdir(instructlabPath, { recursive: true });
await fs.mkdir(path.join(instructlabPath, '.cache'), { recursive: true });
await fs.mkdir(path.join(instructlabPath, '.config'), { recursive: true });
await fs.mkdir(path.join(instructlabPath, '.local'), { recursive: true });
return instructlabPath;
}
}
================================================
FILE: packages/backend/src/managers/llama-stack/llamaStackManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { TaskRegistry } from '../../registries/TaskRegistry';
import { assert, beforeEach, expect, test, vi } from 'vitest';
import type { ContainerCreateResult, ContainerInfo, Disposable, ImageInfo, TelemetryLogger } from '@podman-desktop/api';
import { containerEngine } from '@podman-desktop/api';
import type { PodmanConnection } from '../podmanConnection';
import type { ContainerRegistry } from '../../registries/ContainerRegistry';
import { VMType } from '@shared/models/IPodman';
import type { Task } from '@shared/models/ITask';
import llama_stack_images from '../../assets/llama-stack-images.json';
import llama_stack_playground_images from '../../assets/llama-stack-playground-images.json';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import {
LLAMA_STACK_API_PORT_LABEL,
LLAMA_STACK_CONTAINER_LABEL,
LLAMA_STACK_PLAYGROUND_PORT_LABEL,
LlamaStackManager,
} from './llamaStackManager';
import {
LLAMA_STACK_CONTAINER_TRACKINGID,
type LlamaStackContainers,
} from '@shared/models/llama-stack/LlamaStackContainerInfo';
import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry';
import type { ExtensionConfiguration } from '@shared/models/IExtensionConfiguration';
import type { ModelsManager } from '../modelsManager';
import * as utilsPorts from '../../utils/ports';
vi.mock('@podman-desktop/api', () => {
return {
EventEmitter: vi.fn(),
containerEngine: {
listContainers: vi.fn(),
listImages: vi.fn(),
createContainer: vi.fn(),
onEvent: vi.fn(),
pullImage: vi.fn(),
inspectContainer: vi.fn(),
startContainer: vi.fn(),
stopContainer: vi.fn(),
deleteContainer: vi.fn(),
},
env: {
isWindows: false,
},
};
});
vi.mock('../../utils/ports');
class TestLlamaStackManager extends LlamaStackManager {
public override async refreshLlamaStackContainers(): Promise {
return super.refreshLlamaStackContainers();
}
public override getContainersInfo(): LlamaStackContainers | undefined {
return super.getContainersInfo();
}
}
const podmanConnection: PodmanConnection = {
onPodmanConnectionEvent: vi.fn(),
findRunningContainerProviderConnection: vi.fn(),
execute: vi.fn(),
} as unknown as PodmanConnection;
const containerRegistry = {
onStartContainerEvent: vi.fn(),
onStopContainerEvent: vi.fn(),
onHealthyContainerEvent: vi.fn(),
} as unknown as ContainerRegistry;
const configurationRegistry = {
getExtensionConfiguration: vi.fn(),
} as unknown as ConfigurationRegistry;
const telemetryMock = {
logUsage: vi.fn(),
logError: vi.fn(),
} as unknown as TelemetryLogger;
const modelsManagerMock = {
getModelsInfo: vi.fn(),
} as unknown as ModelsManager;
let taskRegistry: TaskRegistry;
let llamaStackManager: TestLlamaStackManager;
const LLAMA_STACK_CONTAINER_RUNNING = {
Id: 'dummyId',
State: 'running',
Labels: {
[LLAMA_STACK_CONTAINER_LABEL]: 'dummyLabel',
[LLAMA_STACK_API_PORT_LABEL]: '50000',
},
} as unknown as ContainerInfo;
const LLAMA_STACK_CONTAINER_STOPPED = {
Id: 'dummyId',
State: 'stopped',
} as unknown as ContainerInfo;
const NON_LLAMA_STACK_CONTAINER = { Id: 'dummyId' } as unknown as ContainerInfo;
const NO_OP_DISPOSABLE = {
dispose: (): void => {},
} as Disposable;
beforeEach(() => {
vi.resetAllMocks();
taskRegistry = new TaskRegistry({ fire: vi.fn().mockResolvedValue(true) } as unknown as RpcExtension);
llamaStackManager = new TestLlamaStackManager(
'',
taskRegistry,
podmanConnection,
containerRegistry,
configurationRegistry,
telemetryMock,
modelsManagerMock,
);
});
test('getLlamaStackContainers should return undefined if no containers', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
const stack_containers = await llamaStackManager.getLlamaStackContainers();
expect(stack_containers).toEqual({ server: undefined, playground: undefined });
});
test('getLlamaStackContainers should return undefined if no llama stack container', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([NON_LLAMA_STACK_CONTAINER]);
const stack_containers = await llamaStackManager.getLlamaStackContainers();
expect(stack_containers).toEqual({ server: undefined, playground: undefined });
});
test('getLlamaStackContainers should return server info if llama stack server container', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([LLAMA_STACK_CONTAINER_RUNNING]);
const containerInfo = await llamaStackManager.getLlamaStackContainers();
expect(containerInfo).toEqual({
server: { containerId: 'dummyId', port: 50000, state: 'running' },
playground: undefined,
});
});
test('requestcreateLlamaStackContainerss throws error if no podman connection', async () => {
const containerIdPromise = llamaStackManager.requestcreateLlamaStackContainerss({});
await expect(containerIdPromise).rejects.toBeInstanceOf(Error);
});
async function waitTasks(id: string, nb: number): Promise {
return vi.waitFor(() => {
const tasks = taskRegistry.getTasksByLabels({ trackingId: id });
if (tasks.length < nb) {
throw new Error('not completed');
}
return tasks.slice(0, nb);
});
}
test('requestcreateLlamaStackContainerss returns id and error if listImage returns error', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockRejectedValue(new Error());
await llamaStackManager.requestcreateLlamaStackContainerss({});
const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 2);
expect(tasks.some(task => task.state === 'error')).toBeTruthy();
});
test('requestcreateLlamaStackContainerss returns id and error if listImage returns image', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{ RepoTags: [llama_stack_images.default] } as unknown as ImageInfo,
]);
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
apiPort: 10000,
} as ExtensionConfiguration);
await llamaStackManager.requestcreateLlamaStackContainerss({});
const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 3);
expect(tasks.some(task => task.state === 'error')).toBeTruthy();
});
test('requestcreateLlamaStackContainerss returns no error if createContainer returns id and container becomes healthy', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{
RepoTags: [llama_stack_images.default, llama_stack_playground_images.default],
Id: 'imageId',
engineId: 'engine1',
} as unknown as ImageInfo,
]);
vi.mocked(containerEngine.createContainer).mockResolvedValue({
id: 'containerId',
} as unknown as ContainerCreateResult);
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
apiPort: 10000,
} as ExtensionConfiguration);
vi.mocked(utilsPorts.getFreeRandomPort).mockResolvedValueOnce(1234).mockResolvedValueOnce(5678);
vi.mocked(containerEngine.pullImage).mockResolvedValue();
vi.mocked(modelsManagerMock.getModelsInfo).mockReturnValue([]);
vi.mocked(podmanConnection.execute).mockResolvedValue({ stdout: '', stderr: '', command: '' });
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(cb => {
// Fire the callback immediately for testing
setTimeout(() => cb({ id: 'containerId' }), 100);
return NO_OP_DISPOSABLE;
});
await llamaStackManager.requestcreateLlamaStackContainerss({});
const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 4);
expect(tasks.some(task => task.state === 'error')).toBeFalsy();
});
test('requestcreateLlamaStackContainerss registers all local models', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{ RepoTags: [llama_stack_images.default, llama_stack_playground_images.default] } as unknown as ImageInfo,
]);
vi.mocked(containerEngine.createContainer).mockResolvedValue({
id: 'containerId',
} as unknown as ContainerCreateResult);
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
apiPort: 10000,
} as ExtensionConfiguration);
vi.mocked(utilsPorts.getFreeRandomPort).mockResolvedValueOnce(1234).mockResolvedValueOnce(5678);
vi.mocked(containerEngine.pullImage).mockResolvedValue();
vi.mocked(podmanConnection.execute).mockResolvedValue({ stdout: '', stderr: '', command: '' });
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(cb => {
setTimeout(() => cb({ id: 'containerId' }), 100);
return NO_OP_DISPOSABLE;
});
vi.mocked(modelsManagerMock.getModelsInfo).mockReturnValue([
{
id: 'model1',
name: 'Model 1',
description: '',
file: { file: 'model1', path: '/path/to' },
},
{
id: 'model2',
name: 'Model 2',
description: '',
file: { file: 'model2', path: '/path/to' },
},
{
id: 'model3',
name: 'Model 3',
description: '',
},
]);
await llamaStackManager.requestcreateLlamaStackContainerss({});
const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 6);
expect(tasks.some(task => task.state === 'error')).toBeFalsy();
await vi.waitFor(() => {
expect(podmanConnection.execute).toHaveBeenCalledTimes(2);
});
expect(podmanConnection.execute).toHaveBeenCalledWith(expect.anything(), [
'exec',
'containerId',
'llama-stack-client',
'models',
'register',
'Model 1',
'--provider-id',
'podman-ai-lab',
]);
expect(podmanConnection.execute).toHaveBeenCalledWith(expect.anything(), [
'exec',
'containerId',
'llama-stack-client',
'models',
'register',
'Model 2',
'--provider-id',
'podman-ai-lab',
]);
});
test('requestcreateLlamaStackContainerss creates playground container', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{ RepoTags: [llama_stack_images.default, llama_stack_playground_images.default] } as unknown as ImageInfo,
]);
vi.mocked(containerEngine.createContainer).mockResolvedValue({
id: 'containerId',
} as unknown as ContainerCreateResult);
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
apiPort: 10000,
} as ExtensionConfiguration);
vi.mocked(utilsPorts.getFreeRandomPort).mockResolvedValueOnce(1234).mockResolvedValueOnce(5678);
vi.mocked(containerEngine.pullImage).mockResolvedValue();
vi.mocked(podmanConnection.execute).mockResolvedValue({ stdout: '', stderr: '', command: '' });
vi.mocked(containerRegistry.onHealthyContainerEvent).mockImplementation(cb => {
setTimeout(() => cb({ id: 'containerId' }), 100);
return NO_OP_DISPOSABLE;
});
vi.mocked(modelsManagerMock.getModelsInfo).mockReturnValue([
{
id: 'model1',
name: 'Model 1',
description: '',
file: { file: 'model1', path: '/path/to' },
},
{
id: 'model2',
name: 'Model 2',
description: '',
file: { file: 'model2', path: '/path/to' },
},
{
id: 'model3',
name: 'Model 3',
description: '',
},
]);
await llamaStackManager.requestcreateLlamaStackContainerss({});
const tasks = await waitTasks(LLAMA_STACK_CONTAINER_TRACKINGID, 7);
expect(tasks.some(task => task.state === 'error')).toBeFalsy();
expect(containerEngine.createContainer).toHaveBeenCalledTimes(2);
expect(containerEngine.createContainer).toHaveBeenNthCalledWith(
2,
undefined,
expect.objectContaining({
Env: ['LLAMA_STACK_ENDPOINT=http://host.containers.internal:1234'],
HostConfig: expect.objectContaining({
PortBindings: {
'8501/tcp': [
{
HostPort: '5678',
},
],
},
}),
}),
);
});
test('requestcreateLlamaStackContainerss starts both if server and playground exist', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
const server = { Id: 'serverId', Labels: { [LLAMA_STACK_API_PORT_LABEL]: '50000' } } as unknown as ContainerInfo;
const playground = {
Id: 'playgroundId',
Labels: { [LLAMA_STACK_PLAYGROUND_PORT_LABEL]: '60000' },
} as unknown as ContainerInfo;
vi.mocked(containerEngine.listContainers).mockResolvedValue([server, playground]);
const startBothSpy = vi
.spyOn(llamaStackManager as unknown as { startBoth: () => Promise }, 'startBoth')
.mockResolvedValue(undefined);
await llamaStackManager.requestcreateLlamaStackContainerss({});
expect(startBothSpy).toHaveBeenCalledWith(server, playground, expect.any(Object));
});
test('requestcreateLlamaStackContainerss creates playground if server exists but playground missing', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
const server = { Id: 'serverId', Labels: { [LLAMA_STACK_API_PORT_LABEL]: '50000' } } as unknown as ContainerInfo;
vi.mocked(containerEngine.listContainers).mockResolvedValue([server]);
const createPlaygroundSpy = vi
.spyOn(
llamaStackManager as unknown as { createPlaygroundFromServer: () => Promise },
'createPlaygroundFromServer',
)
.mockResolvedValue(undefined);
await llamaStackManager.requestcreateLlamaStackContainerss({});
expect(createPlaygroundSpy).toHaveBeenCalledWith(server, expect.any(Object), expect.anything());
});
test('requestcreateLlamaStackContainerss deletes existing playground and creates both if server missing', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
const playground = {
Id: 'playgroundId',
Labels: { [LLAMA_STACK_PLAYGROUND_PORT_LABEL]: '60000' },
} as unknown as ContainerInfo;
vi.mocked(containerEngine.listContainers).mockResolvedValue([playground]);
const createBothSpy = vi
.spyOn(llamaStackManager as unknown as { createBoth: () => Promise }, 'createBoth')
.mockResolvedValue(undefined);
await llamaStackManager.requestcreateLlamaStackContainerss({});
expect(createBothSpy).toHaveBeenCalledWith(playground, expect.any(Object), expect.anything());
});
test('requestcreateLlamaStackContainerss creates both if server and playground missing', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(podmanConnection.findRunningContainerProviderConnection).mockReturnValue({
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
type: 'podman',
status: () => 'started',
endpoint: {
socketPath: 'socket.sock',
},
});
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
const createBothSpy = vi
.spyOn(llamaStackManager as unknown as { createBoth: () => Promise }, 'createBoth')
.mockResolvedValue(undefined);
await llamaStackManager.requestcreateLlamaStackContainerss({});
expect(createBothSpy).toHaveBeenCalledWith(undefined, expect.any(Object), expect.anything());
});
test('onPodmanConnectionEvent start event should call refreshLlamaStackContainers and set containerInfo', async () => {
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers');
vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]);
vi.mocked(podmanConnection.onPodmanConnectionEvent).mockImplementation(f => {
f({
status: 'started',
});
return NO_OP_DISPOSABLE;
});
llamaStackManager.init();
expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith();
await vi.waitFor(() => {
expect(llamaStackManager.getContainersInfo()).toEqual({
server: { containerId: 'dummyId', port: 50000, state: 'running' },
playground: undefined,
});
});
});
test('onPodmanConnectionEvent stop event should call refreshLlamaStackContainers and clear containerInfo', async () => {
vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers');
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]);
vi.mocked(podmanConnection.onPodmanConnectionEvent).mockReturnValue(NO_OP_DISPOSABLE);
llamaStackManager.init();
const listener = vi.mocked(podmanConnection.onPodmanConnectionEvent).mock.calls[0][0];
assert(listener, 'onPodmanConnectionEvent should have been called');
listener({ status: 'started' });
expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith();
await vi.waitFor(() => {
expect(llamaStackManager.getContainersInfo()).toEqual({
server: { containerId: 'dummyId', port: 50000, state: 'running' },
playground: undefined,
});
});
vi.mocked(llamaStackManager.refreshLlamaStackContainers).mockClear();
vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_STOPPED]);
listener({ status: 'stopped' });
expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith();
await vi.waitFor(async () => {
expect(llamaStackManager.getContainersInfo()).toEqual({ server: undefined, playground: undefined });
});
});
test('onStartContainerEvent event should call refreshLlamaStackContainers and set containerInfo', async () => {
vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers');
vi.mocked(containerEngine.listContainers).mockResolvedValue([]);
vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]);
vi.mocked(containerRegistry.onStartContainerEvent).mockImplementation(f => {
f({
id: 'dummyId',
});
return NO_OP_DISPOSABLE;
});
llamaStackManager.init();
expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith();
await vi.waitFor(() => {
expect(llamaStackManager.getContainersInfo()).toEqual({
server: { containerId: 'dummyId', port: 50000, state: 'running' },
playground: undefined,
});
});
});
test('onStopContainerEvent event should call refreshLlamaStackContainers and clear containerInfo', async () => {
vi.spyOn(llamaStackManager, 'refreshLlamaStackContainers');
vi.spyOn(taskRegistry, 'deleteByLabels');
vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_RUNNING]);
vi.mocked(containerRegistry.onStartContainerEvent).mockImplementation(f => {
f({
id: 'dummyId',
});
return NO_OP_DISPOSABLE;
});
vi.mocked(containerRegistry.onStopContainerEvent).mockReturnValue(NO_OP_DISPOSABLE);
llamaStackManager.init();
expect(llamaStackManager.refreshLlamaStackContainers).toHaveBeenCalledWith();
await vi.waitFor(() => {
expect(llamaStackManager.getContainersInfo()).toEqual({
server: { containerId: 'dummyId', port: 50000, state: 'running' },
playground: undefined,
});
});
vi.mocked(llamaStackManager.refreshLlamaStackContainers).mockClear();
vi.mocked(containerEngine.listContainers).mockResolvedValueOnce([LLAMA_STACK_CONTAINER_STOPPED]);
const listener = vi.mocked(containerRegistry.onStopContainerEvent).mock.calls[0][0];
assert(listener, 'onStopContainerEvent should have been called');
listener({ id: 'dummyId' });
expect(taskRegistry.deleteByLabels).toHaveBeenCalled();
await vi.waitFor(async () => {
expect(llamaStackManager.getContainersInfo()).toBeUndefined();
});
});
================================================
FILE: packages/backend/src/managers/llama-stack/llamaStackManager.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { TaskRegistry } from '../../registries/TaskRegistry';
import {
containerEngine,
env,
process,
type ContainerInfo,
type Disposable,
type TelemetryLogger,
type ContainerProviderConnection,
type ContainerCreateOptions,
type ImageInfo,
} from '@podman-desktop/api';
import type { PodmanConnection, PodmanConnectionEvent } from '../podmanConnection';
import llama_stack_images from '../../assets/llama-stack-images.json';
import llama_stack_playground_images from '../../assets/llama-stack-playground-images.json';
import { getImageInfo } from '../../utils/inferenceUtils';
import type { ContainerRegistry, ContainerEvent, ContainerHealthy } from '../../registries/ContainerRegistry';
import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils';
import { getRandomName } from '../../utils/randomUtils';
import type { LlamaStackContainerInfo, LlamaStackContainers } from '@shared/models/llama-stack/LlamaStackContainerInfo';
import { LLAMA_STACK_CONTAINER_TRACKINGID } from '@shared/models/llama-stack/LlamaStackContainerInfo';
import type { LlamaStackContainerConfiguration } from '@shared/models/llama-stack/LlamaStackContainerConfiguration';
import path from 'node:path';
import fs from 'node:fs/promises';
import type { ConfigurationRegistry } from '../../registries/ConfigurationRegistry';
import { getFreeRandomPort } from '../../utils/ports';
import { TaskRunner } from '../TaskRunner';
import type { ModelsManager } from '../modelsManager';
import { getPodmanCli, getPodmanMachineName } from '../../utils/podman';
export const LLAMA_STACK_CONTAINER_LABEL = 'ai-lab-llama-stack-container';
export const LLAMA_STACK_API_PORT_LABEL = 'ai-lab-llama-stack-api-port';
export const LLAMA_STACK_PLAYGROUND_PORT_LABEL = 'ai-lab-llama-stack-playground-port';
export const SECOND: number = 1_000_000_000;
/*
* Get the local IP address of the Podman machine.
* See https://learn.microsoft.com/en-us/windows/wsl/networking
*/
async function getLocalIPAddress(connection: ContainerProviderConnection): Promise {
const cli = getPodmanCli();
const machineName = getPodmanMachineName(connection);
const result = await process.exec(cli, [
'machine',
'ssh',
machineName,
'ip',
'route',
'show',
'|',
'grep',
'-i',
'default',
'|',
'awk',
// eslint-disable-next-line quotes
"'{print $3}'",
]);
return result.stdout.trim();
}
export class LlamaStackManager implements Disposable {
#initialized: boolean;
#stack_containers: LlamaStackContainers | undefined;
#creationInProgress = false;
#disposables: Disposable[];
#taskRunner: TaskRunner;
constructor(
private readonly appUserDirectory: string,
private taskRegistry: TaskRegistry,
private podmanConnection: PodmanConnection,
private containerRegistry: ContainerRegistry,
private configurationRegistry: ConfigurationRegistry,
private telemetryLogger: TelemetryLogger,
private modelsManager: ModelsManager,
) {
this.#initialized = false;
this.#disposables = [];
this.#taskRunner = new TaskRunner(this.taskRegistry);
}
init(): void {
this.#disposables.push(this.podmanConnection.onPodmanConnectionEvent(this.watchMachineEvent.bind(this)));
this.#disposables.push(this.containerRegistry.onStartContainerEvent(this.onStartContainerEvent.bind(this)));
this.#disposables.push(this.containerRegistry.onStopContainerEvent(this.onStopContainerEvent.bind(this)));
}
dispose(): void {
this.#disposables.forEach(disposable => disposable.dispose());
this.#disposables = [];
}
private async watchMachineEvent(event: PodmanConnectionEvent): Promise {
if (
(event.status === 'started' && (!this.#stack_containers?.server || !this.#stack_containers?.playground)) ||
(event.status === 'stopped' && (this.#stack_containers?.server || this.#stack_containers?.playground))
) {
await this.refreshLlamaStackContainers();
}
}
private async onStartContainerEvent(): Promise {
await this.refreshLlamaStackContainers();
}
private async onStopContainerEvent(event: ContainerEvent): Promise {
const serverId = this.#stack_containers?.server?.containerId;
const playgroundId = this.#stack_containers?.playground?.containerId;
if (this.#creationInProgress) return;
if (serverId === event.id || playgroundId === event.id) {
this.#stack_containers = undefined;
this.taskRegistry.deleteByLabels({ trackingId: LLAMA_STACK_CONTAINER_TRACKINGID });
}
await this.refreshLlamaStackContainers();
}
/**
* getLlamaStackContainers returns the first running container with a Llama Stack label.
* The container is searched only the first time and the result is cached for subsequent calls.
*
* Returns undefined if no container is found
*/
async getLlamaStackContainers(): Promise {
if (!this.#initialized) {
await this.refreshLlamaStackContainers();
this.#initialized = true;
}
return this.#stack_containers;
}
/**
* refreshLlamaStackContainers refreshes the container info.
* It is called when the machine is started or when a container is stopped.
*/
protected async refreshLlamaStackContainers(): Promise {
const containers = await containerEngine.listContainers();
const serverContainer = containers.find(c => c.Labels && LLAMA_STACK_API_PORT_LABEL in c.Labels);
let serverInfo: LlamaStackContainerInfo | undefined;
if (serverContainer) {
serverInfo = {
containerId: serverContainer.Id,
port: parseInt(serverContainer.Labels[LLAMA_STACK_API_PORT_LABEL], 10),
state: serverContainer.State,
};
}
const playgroundContainer = containers.find(c => c.Labels && LLAMA_STACK_PLAYGROUND_PORT_LABEL in c.Labels);
let playgroundInfo: LlamaStackContainerInfo | undefined;
if (playgroundContainer) {
playgroundInfo = {
containerId: playgroundContainer.Id,
port: parseInt(playgroundContainer.Labels[LLAMA_STACK_PLAYGROUND_PORT_LABEL], 10),
state: playgroundContainer.State,
};
}
this.#stack_containers = {
server: serverInfo,
playground: playgroundInfo,
};
}
/**
* requestcreateLlamaStackContainerss creates the Llama Stack containers.
* It is called when the user clicks the "Start" button.
*
* Flowchart for checking containers and handling them:
*
* Server exists
* ├─ Playground exists
* │ └─ Start both
* └─ Playground doesn't exist
* └─ Create new playground
*
* Server doesn't exist
* ├─ Playground exists
* │ └─ Delete playground and update state
* └─ Playground doesn't exist
* └─ Create both
*/
async requestcreateLlamaStackContainerss(config: LlamaStackContainerConfiguration): Promise {
const connection: ContainerProviderConnection | undefined = config.connection
? this.podmanConnection.getContainerProviderConnection(config.connection)
: this.podmanConnection.findRunningContainerProviderConnection();
if (!connection) throw new Error('Cannot find running container provider connection');
const labels = { trackingId: LLAMA_STACK_CONTAINER_TRACKINGID };
const containers = await containerEngine.listContainers();
const server = containers.find(c => c.Labels && LLAMA_STACK_API_PORT_LABEL in c.Labels);
const playground = containers.find(c => c.Labels && LLAMA_STACK_PLAYGROUND_PORT_LABEL in c.Labels);
try {
if (server) {
if (playground) {
await this.startBoth(server, playground, labels);
} else {
await this.createPlaygroundFromServer(server, labels, connection);
}
} else {
this.#creationInProgress = true;
await this.createBoth(playground, labels, connection);
this.#creationInProgress = false;
}
} catch (err) {
this.telemetryLogger.logError('llamaStack.startContainer', { error: err });
}
}
/**
* Helper: Both server and playground exist → start both
*/
private async startBoth(
server: ContainerInfo,
playground: ContainerInfo,
labels: { [p: string]: string },
): Promise {
await this.#taskRunner.runAsTask(
labels,
{
loadingLabel: 'Starting Server and/or Playground',
errorMsg: err => `Failed to start existing containers: ${String(err)}`,
},
async ({ updateLabels }) => {
if (server.State !== 'running') await containerEngine.startContainer(server.engineId, server.Id);
if (playground.State !== 'running') await containerEngine.startContainer(playground.engineId, playground.Id);
const serverInfo = await this.waitLlamaStackServerHealthy(
{
containerId: server.Id,
port: parseInt(server.Labels[LLAMA_STACK_API_PORT_LABEL], 10),
state: server.State,
},
labels,
);
this.#stack_containers = {
server: serverInfo,
playground: {
containerId: playground.Id,
port: parseInt(playground.Labels[LLAMA_STACK_PLAYGROUND_PORT_LABEL], 10),
state: 'running',
},
};
updateLabels(l => ({
...l,
containerId: serverInfo.containerId,
port: `${serverInfo.port}`,
state: serverInfo.state,
playgroundId: playground.Id,
playgroundPort: `${parseInt(playground.Labels[LLAMA_STACK_PLAYGROUND_PORT_LABEL], 10)}`,
playgroundState: 'running',
}));
this.telemetryLogger.logUsage('llamaStack.startContainer');
},
);
}
/**
* Helper: Only server exists → create playground
*/
private async createPlaygroundFromServer(
server: ContainerInfo,
labels: { [p: string]: string },
connection: ContainerProviderConnection,
): Promise {
await this.#taskRunner.runAsTask(
labels,
{
loadingLabel: 'Creating Playground container',
errorMsg: err => `Failed to create playground: ${String(err)}`,
},
async ({ updateLabels }) => {
if (server.State !== 'running') await containerEngine.startContainer(server.engineId, server.Id);
const serverInfo = await this.waitLlamaStackServerHealthy(
{
containerId: server.Id,
port: parseInt(server.Labels[LLAMA_STACK_API_PORT_LABEL], 10),
state: server.State,
},
labels,
);
const playgroundInfo = await this.createPlaygroundContainer(serverInfo, labels, connection);
this.#stack_containers = { server: serverInfo, playground: playgroundInfo };
updateLabels(l => ({
...l,
containerId: serverInfo.containerId,
port: `${serverInfo.port}`,
state: serverInfo.state,
playgroundId: playgroundInfo.containerId,
playgroundPort: `${playgroundInfo.port}`,
playgroundState: playgroundInfo.state,
}));
this.telemetryLogger.logUsage('llamaStack.startContainer');
},
);
}
/**
* Helper: Only playground exists → delete it and create both containers
*/
private async createBoth(
playground: ContainerInfo | undefined,
labels: { [p: string]: string },
connection: ContainerProviderConnection,
): Promise {
await this.#taskRunner.runAsTask(
labels,
{
loadingLabel: 'Creating Server and Playground',
errorMsg: err => `Failed to create Llama Stack containers: ${String(err)}`,
failFastSubtasks: true,
},
async ({ updateLabels }) => {
// If playground exists, stop & delete it
if (playground) {
if (playground.State === 'running') {
await containerEngine.stopContainer(playground.engineId, playground.Id);
}
await containerEngine.deleteContainer(playground.engineId, playground.Id);
}
// Create new server + playground
const stackInfo = await this.createLlamaStackContainers(connection, labels);
this.#stack_containers = stackInfo;
// Update task labels for UI
updateLabels(l => ({
...l,
containerId: stackInfo.server?.containerId ?? '',
port: `${stackInfo.server?.port}`,
state: stackInfo.server?.state ?? '',
playgroundId: stackInfo.playground?.containerId ?? '',
playgroundPort: `${stackInfo.playground?.port}`,
playgroundState: stackInfo.playground?.state ?? '',
}));
this.telemetryLogger.logUsage('llamaStack.startContainer');
},
);
}
async createLlamaStackContainers(
connection: ContainerProviderConnection,
labels: { [p: string]: string },
): Promise {
const image = llama_stack_images.default;
const imageInfo = await this.#taskRunner.runAsTask(
labels,
{
loadingLabel: `Pulling ${image}.`,
errorMsg: err => `Something went wrong while pulling ${image}: ${String(err)}`,
},
() => getImageInfo(connection, image, () => {}),
);
// Create the server container
let serverInfo = await this.createServerContainer(connection, image, imageInfo, labels);
serverInfo = await this.waitLlamaStackServerHealthy(serverInfo, labels);
serverInfo = await this.registerModels(serverInfo, labels, connection);
const playgroundInfo = await this.createPlaygroundContainer(serverInfo, labels, connection);
// Return both in proper interface
return {
server: serverInfo,
playground: playgroundInfo,
};
}
private async createServerContainer(
connection: ContainerProviderConnection,
image: string,
imageInfo: ImageInfo,
labels: { [p: string]: string },
): Promise {
const folder = await this.getLlamaStackContainersFolder();
const aiLabApiHost =
env.isWindows && connection.vmType === 'wsl' ? await getLocalIPAddress(connection) : 'host.docker.internal';
const aiLabApiPort = this.configurationRegistry.getExtensionConfiguration().apiPort;
const llamaStackApiPort = await getFreeRandomPort('0.0.0.0');
const createContainerOptions: ContainerCreateOptions = {
Image: imageInfo.Id,
name: getRandomName('llama-stack'),
Labels: {
[LLAMA_STACK_CONTAINER_LABEL]: image,
[LLAMA_STACK_API_PORT_LABEL]: `${llamaStackApiPort}`,
},
HostConfig: {
AutoRemove: false,
SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION],
Mounts: [
{
Target: '/app/.llama',
Source: path.join(folder, '.llama'),
Type: 'bind',
},
],
UsernsMode: 'keep-id:uid=0,gid=0',
PortBindings: { '8321/tcp': [{ HostPort: `${llamaStackApiPort}` }] },
},
Env: [`PODMAN_AI_LAB_URL=http://${aiLabApiHost}:${aiLabApiPort}`],
OpenStdin: true,
start: true,
HealthCheck: {
Test: ['CMD-SHELL', `curl -sSf localhost:8321/v1/models > /dev/null`],
Interval: SECOND * 5,
Retries: 20,
},
};
return this.#taskRunner.runAsTask(
labels,
{
loadingLabel: 'Starting Llama Stack server container',
errorMsg: err => `Something went wrong while creating server container: ${String(err)}`,
},
async () => {
const { id } = await containerEngine.createContainer(imageInfo.engineId, createContainerOptions);
return {
containerId: id,
port: llamaStackApiPort,
state: 'starting',
};
},
);
}
async waitLlamaStackServerHealthy(
serverInfo: LlamaStackContainerInfo,
labels: { [p: string]: string },
): Promise {
return this.#taskRunner.runAsTask(
labels,
{
loadingLabel: 'Waiting for Llama Stack server to be healthy',
errorMsg: err => `Something went wrong while checking server health: ${String(err)}`,
},
() =>
new Promise((resolve, _reject) => {
const disposable = this.containerRegistry.onHealthyContainerEvent((event: ContainerHealthy) => {
if (event.id !== serverInfo.containerId) return;
disposable.dispose();
serverInfo.state = 'running';
this.telemetryLogger.logUsage('llamaStack.startContainer');
resolve(serverInfo);
});
}),
);
}
async registerModels(
serverInfo: LlamaStackContainerInfo,
labels: { [p: string]: string },
connection: ContainerProviderConnection,
): Promise {
for (const model of this.modelsManager.getModelsInfo().filter(model => model.file)) {
await this.#taskRunner.runAsTask(
labels,
{
loadingLabel: `Registering model ${model.name}`,
errorMsg: err => `Something went wrong while registering model: ${String(err)}`,
},
async () => {
await this.podmanConnection.execute(connection, [
'exec',
serverInfo.containerId,
'llama-stack-client',
'models',
'register',
model.name,
'--provider-id',
'podman-ai-lab',
]);
},
);
}
return serverInfo;
}
private async createPlaygroundContainer(
serverInfo: LlamaStackContainerInfo,
labels: { [p: string]: string },
connection: ContainerProviderConnection,
): Promise {
const image = llama_stack_playground_images.default;
const imageInfo = await this.#taskRunner.runAsTask(
labels,
{
loadingLabel: `Pulling ${image}.`,
errorMsg: err => `Something went wrong while pulling ${image}: ${String(err)}`,
},
() => getImageInfo(connection, image, () => {}),
);
const playgroundPort = await getFreeRandomPort('0.0.0.0');
const createContainerOptions: ContainerCreateOptions = {
Image: imageInfo.Id,
name: getRandomName('llama-stack-playground'),
Labels: {
[LLAMA_STACK_CONTAINER_LABEL]: image,
[LLAMA_STACK_PLAYGROUND_PORT_LABEL]: `${playgroundPort}`,
},
HostConfig: {
AutoRemove: false,
PortBindings: { '8501/tcp': [{ HostPort: `${playgroundPort}` }] },
},
Env: [`LLAMA_STACK_ENDPOINT=http://host.containers.internal:${serverInfo.port}`],
OpenStdin: true,
start: true,
};
return this.#taskRunner.runAsTask(
labels,
{
loadingLabel: 'Starting Llama Stack Playground container',
errorMsg: err => `Something went wrong while creating playground container: ${String(err)}`,
},
async () => {
const { id } = await containerEngine.createContainer(imageInfo.engineId, createContainerOptions);
return {
containerId: id,
port: playgroundPort,
state: 'running',
};
},
);
}
private async getLlamaStackContainersFolder(): Promise {
const llamaStackPath = path.join(this.appUserDirectory, 'llama-stack', 'container');
await fs.mkdir(path.join(llamaStackPath, '.llama'), { recursive: true });
return llamaStackPath;
}
// For tests only
protected getContainersInfo(): LlamaStackContainers | undefined {
return this.#stack_containers;
}
}
================================================
FILE: packages/backend/src/managers/modelsManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { type MockInstance, beforeEach, describe, expect, test, vi } from 'vitest';
import os from 'node:os';
import fs, { type Stats, type PathLike } from 'node:fs';
import path from 'node:path';
import { ModelsManager } from './modelsManager';
import { env, process as coreProcess } from '@podman-desktop/api';
import type { RunResult, TelemetryLogger, ContainerProviderConnection } from '@podman-desktop/api';
import type { CatalogManager } from './catalogManager';
import type { ModelInfo } from '@shared/models/IModelInfo';
import * as utils from '../utils/utils';
import { TaskRegistry } from '../registries/TaskRegistry';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import * as sha from '../utils/sha';
import type { GGUFParseOutput } from '@huggingface/gguf';
import { gguf } from '@huggingface/gguf';
import type { PodmanConnection } from './podmanConnection';
import { VMType } from '@shared/models/IPodman';
import { getPodmanMachineName } from '../utils/podman';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import { Uploader } from '../utils/uploader';
import { ModelHandlerRegistry } from '../registries/ModelHandlerRegistry';
import { URLModelHandler } from '../models/URLModelHandler';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_NEW_MODELS_STATE } from '@shared/Messages';
const mocks = vi.hoisted(() => {
return {
showErrorMessageMock: vi.fn(),
logUsageMock: vi.fn(),
logErrorMock: vi.fn(),
performDownloadMock: vi.fn(),
onEventDownloadMock: vi.fn(),
getTargetMock: vi.fn(),
getDownloaderCompleter: vi.fn(),
isCompletionEventMock: vi.fn(),
getPodmanCliMock: vi.fn(),
};
});
vi.mock('../utils/uploader', () => ({
Uploader: vi.fn(),
}));
vi.mock('@huggingface/gguf', () => ({
gguf: vi.fn(),
}));
vi.mock('../utils/podman', () => ({
getPodmanCli: mocks.getPodmanCliMock,
getPodmanMachineName: vi.fn(),
}));
vi.mock('@podman-desktop/api', () => {
return {
Disposable: {
create: vi.fn(),
},
env: {
isWindows: false,
},
process: {
exec: vi.fn(),
},
fs: {
createFileSystemWatcher: (): unknown => ({
onDidCreate: vi.fn(),
onDidDelete: vi.fn(),
onDidChange: vi.fn(),
}),
},
window: {
showErrorMessage: mocks.showErrorMessageMock,
},
EventEmitter: vi.fn(),
};
});
vi.mock('../utils/downloader', () => ({
isCompletionEvent: mocks.isCompletionEventMock,
Downloader: class {
get completed(): boolean {
return mocks.getDownloaderCompleter();
}
onEvent = mocks.onEventDownloadMock;
perform = mocks.performDownloadMock;
getTarget = mocks.getTargetMock;
},
}));
const podmanConnectionMock = {
getContainerProviderConnections: vi.fn(),
} as unknown as PodmanConnection;
const cancellationTokenRegistryMock = {
createCancellationTokenSource: vi.fn(),
} as unknown as CancellationTokenRegistry;
let taskRegistry: TaskRegistry;
const telemetryLogger = {
logUsage: mocks.logUsageMock,
logError: mocks.logErrorMock,
} as unknown as TelemetryLogger;
const configurationRegistryMock: ConfigurationRegistry = {
getExtensionConfiguration: vi.fn(),
} as unknown as ConfigurationRegistry;
let modelHandlerRegistry: ModelHandlerRegistry;
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
taskRegistry = new TaskRegistry(rpcExtensionMock);
modelHandlerRegistry = new ModelHandlerRegistry(rpcExtensionMock);
vi.mocked(configurationRegistryMock.getExtensionConfiguration).mockReturnValue({
modelUploadDisabled: false,
modelsPath: '~/downloads',
experimentalTuning: false,
apiPort: 0,
inferenceRuntime: 'llama-cpp',
experimentalGPU: false,
showGPUPromotion: false,
appearance: 'dark',
});
mocks.isCompletionEventMock.mockReturnValue(true);
});
const dirent = [
{
isDirectory: (): boolean => true,
parentPath: '/home/user/appstudio-dir',
name: 'model-id-1',
},
{
isDirectory: (): boolean => true,
parentPath: '/home/user/appstudio-dir',
name: 'model-id-2',
},
{
isDirectory: (): boolean => false,
parentPath: '/home/user/appstudio-dir',
name: 'other-file-should-be-ignored.txt',
},
] as fs.Dirent[];
function mockFiles(now: Date): void {
vi.spyOn(os, 'homedir').mockReturnValue('/home/user');
const existsSyncSpy = vi.spyOn(fs, 'existsSync');
existsSyncSpy.mockImplementation((path: PathLike) => {
if (process.platform === 'win32') {
expect(path).toBe('C:\\home\\user\\aistudio\\models');
} else {
expect(path).toBe('/home/user/aistudio/models');
}
return true;
});
const statSpy = vi.spyOn(fs.promises, 'stat');
const info: Stats = {} as Stats;
info.size = 32000;
info.mtime = now;
statSpy.mockResolvedValue(info);
const readdirMock = vi.spyOn(fs.promises, 'readdir') as unknown as MockInstance<
(path: string) => Promise
>;
readdirMock.mockImplementation((dir: string) => {
if (dir.endsWith('model-id-1') || dir.endsWith('model-id-2')) {
const base = path.basename(dir);
return Promise.resolve([base + '-model']);
} else {
return Promise.resolve(dirent);
}
});
}
test('getModelsInfo should get models in local directory', async () => {
const now = new Date();
mockFiles(now);
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [
{ id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo,
{ id: 'model-id-2', name: 'model-id-2-model' } as ModelInfo,
];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.loadLocalModels();
expect(manager.getModelsInfo()).toEqual([
{
id: 'model-id-1',
name: 'model-id-1-model',
file: {
size: 32000,
creation: now,
path: path.resolve(dirent[0].parentPath, dirent[0].name),
file: 'model-id-1-model',
},
},
{
id: 'model-id-2',
name: 'model-id-2-model',
file: {
size: 32000,
creation: now,
path: path.resolve(dirent[1].parentPath, dirent[1].name),
file: 'model-id-2-model',
},
},
]);
});
test('getModelsInfo should return an empty array if the models folder does not exist', async () => {
vi.spyOn(os, 'homedir').mockReturnValue('/home/user');
const existsSyncSpy = vi.spyOn(fs, 'existsSync');
existsSyncSpy.mockReturnValue(false);
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.getLocalModelsFromDisk();
expect(manager.getModelsInfo()).toEqual([]);
if (process.platform === 'win32') {
expect(existsSyncSpy).toHaveBeenCalledWith('C:\\home\\user\\aistudio\\models');
} else {
expect(existsSyncSpy).toHaveBeenCalledWith('/home/user/aistudio/models');
}
});
test('getLocalModelsFromDisk should return undefined Date and size when stat fail', async () => {
const now = new Date();
mockFiles(now);
const statSpy = vi.spyOn(fs.promises, 'stat') as unknown as MockInstance<(path: PathLike) => Promise>;
statSpy.mockImplementation((path: PathLike) => {
if (`${path}`.endsWith('model-id-1')) throw new Error('random-error');
return Promise.resolve({ isDirectory: () => true } as Stats);
});
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [{ id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.loadLocalModels();
expect(manager.getModelsInfo()).toEqual([
{
id: 'model-id-1',
name: 'model-id-1-model',
file: {
size: undefined,
creation: undefined,
path: path.resolve(dirent[0].parentPath, dirent[0].name),
file: 'model-id-1-model',
},
},
]);
});
test('getLocalModelsFromDisk should skip folders containing tmp files', async () => {
const now = new Date();
mockFiles(now);
const statSpy = vi.spyOn(fs.promises, 'stat') as unknown as MockInstance<(path: PathLike) => Promise>;
statSpy.mockImplementation((path: PathLike) => {
if (`${path}`.endsWith('model-id-1')) throw new Error('random-error');
return Promise.resolve({ isDirectory: () => true } as Stats);
});
const readdirMock = vi.spyOn(fs.promises, 'readdir') as unknown as MockInstance<
(path: string) => Promise
>;
readdirMock.mockImplementation((dir: string) => {
if (dir.endsWith('model-id-1') || dir.endsWith('model-id-2')) {
const base = path.basename(dir);
return Promise.resolve([base + '-model.tmp']);
} else {
return Promise.resolve(dirent);
}
});
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [{ id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.loadLocalModels();
expect(manager.getModelsInfo()).toEqual([
{
id: 'model-id-1',
name: 'model-id-1-model',
},
]);
});
test('loadLocalModels should post a message with the message on disk and on catalog', async () => {
const now = new Date();
mockFiles(now);
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: () => {
return [
{
id: 'model-id-1',
},
] as ModelInfo[];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.loadLocalModels();
expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(2, MSG_NEW_MODELS_STATE, [
{
file: {
creation: now,
file: 'model-id-1-model',
size: 32000,
path: path.resolve(dirent[0].parentPath, dirent[0].name),
},
id: 'model-id-1',
},
]);
});
test('deleteModel deletes the model folder', async () => {
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const now = new Date();
mockFiles(now);
const rmSpy = vi.spyOn(fs.promises, 'rm');
rmSpy.mockResolvedValue();
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: () => {
return [
{
id: 'model-id-1',
url: 'https:///model-url',
},
] as ModelInfo[];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.loadLocalModels();
await manager.deleteModel('model-id-1');
// check that the model's folder is removed from disk
if (process.platform === 'win32') {
expect(rmSpy).toBeCalledWith('C:\\home\\user\\aistudio\\models\\model-id-1', {
recursive: true,
force: true,
maxRetries: 3,
});
} else {
expect(rmSpy).toBeCalledWith('/home/user/aistudio/models/model-id-1', {
recursive: true,
force: true,
maxRetries: 3,
});
}
expect(rpcExtensionMock.fire).toHaveBeenCalledTimes(5);
// check that a new state is sent with the model removed
expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(4, MSG_NEW_MODELS_STATE, [
{
id: 'model-id-1',
url: 'https:///model-url',
},
]);
expect(mocks.logUsageMock).toHaveBeenNthCalledWith(1, 'model.delete', { 'model.id': expect.any(String) });
});
describe('deleting models', () => {
test('deleteModel fails to delete the model folder', async () => {
let modelsDir: string;
if (process.platform === 'win32') {
modelsDir = 'C:\\home\\user\\aistudio\\models';
} else {
modelsDir = '/home/user/aistudio/models';
}
const now = new Date();
mockFiles(now);
const rmSpy = vi.spyOn(fs.promises, 'rm');
rmSpy.mockRejectedValue(new Error('failed'));
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: () => {
return [
{
id: 'model-id-1',
url: 'https://model-url',
},
] as ModelInfo[];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, modelsDir));
await manager.init();
await manager.loadLocalModels();
await manager.deleteModel('model-id-1');
// check that the model's folder is removed from disk
if (process.platform === 'win32') {
expect(rmSpy).toBeCalledWith('C:\\home\\user\\aistudio\\models\\model-id-1', {
recursive: true,
force: true,
maxRetries: 3,
});
} else {
expect(rmSpy).toBeCalledWith('/home/user/aistudio/models/model-id-1', {
recursive: true,
force: true,
maxRetries: 3,
});
}
expect(rpcExtensionMock.fire).toHaveBeenCalledTimes(5);
// check that a new state is sent with the model non removed
expect(rpcExtensionMock.fire).toHaveBeenNthCalledWith(4, MSG_NEW_MODELS_STATE, [
{
id: 'model-id-1',
url: 'https://model-url',
file: {
creation: now,
file: 'model-id-1-model',
size: 32000,
path: path.resolve(dirent[0].parentPath, dirent[0].name),
},
},
]);
expect(mocks.showErrorMessageMock).toHaveBeenCalledOnce();
expect(mocks.logErrorMock).toHaveBeenCalled();
});
test('delete local model should call catalogManager', async () => {
vi.mocked(env).isWindows = false;
const removeUserModelMock = vi.fn();
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: () => {
return [
{
id: 'model-id-1',
file: {
file: 'model-id-1-model',
size: 32000,
path: path.resolve(dirent[0].parentPath, dirent[0].name),
},
},
] as ModelInfo[];
},
removeUserModel: removeUserModelMock,
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await manager.loadLocalModels();
await manager.deleteModel('model-id-1');
expect(removeUserModelMock).toBeCalledWith('model-id-1');
});
test('deleting on windows should check for all connections', async () => {
vi.mocked(coreProcess.exec).mockResolvedValue({} as RunResult);
mocks.getPodmanCliMock.mockReturnValue('dummyCli');
vi.mocked(env).isWindows = true;
const connections: ContainerProviderConnection[] = [
{
name: 'Machine 1',
type: 'podman',
vmType: VMType.HYPERV,
endpoint: {
socketPath: '',
},
status: () => 'started',
},
{
name: 'Machine 2',
type: 'podman',
vmType: VMType.WSL,
endpoint: {
socketPath: '',
},
status: () => 'started',
},
];
vi.mocked(podmanConnectionMock.getContainerProviderConnections).mockReturnValue(connections);
vi.mocked(getPodmanMachineName).mockReturnValue('machine-2');
const rmSpy = vi.spyOn(fs.promises, 'rm');
rmSpy.mockResolvedValue(undefined);
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: () => {
return [
{
id: 'model-id-1',
url: 'model-url',
file: {
file: 'dummyFile',
path: 'dummyPath',
},
},
] as ModelInfo[];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await manager.loadLocalModels();
// delete the model
await manager.deleteModel('model-id-1');
expect(podmanConnectionMock.getContainerProviderConnections).toHaveBeenCalledOnce();
expect(coreProcess.exec).toHaveBeenCalledWith('dummyCli', [
'machine',
'ssh',
'machine-2',
'rm',
'-f',
'/home/user/ai-lab/models/model-id-1',
]);
});
});
describe('downloadModel', () => {
test('download model if not already on disk', async () => {
vi.mocked(cancellationTokenRegistryMock.createCancellationTokenSource).mockReturnValue(99);
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, 'appdir'));
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(false);
vi.spyOn(utils, 'getDurationSecondsSince').mockReturnValue(99);
const updateTaskMock = vi.spyOn(taskRegistry, 'updateTask');
await manager.requestDownloadModel({
id: 'id',
url: 'https:///url',
name: 'name',
} as ModelInfo);
expect(cancellationTokenRegistryMock.createCancellationTokenSource).toHaveBeenCalled();
expect(updateTaskMock).toHaveBeenLastCalledWith({
id: expect.any(String),
name: 'Downloading model name',
labels: {
'model-pulling': 'id',
},
state: 'loading',
cancellationToken: 99,
});
});
test('retrieve model path if already on disk', async () => {
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
const updateTaskMock = vi.spyOn(taskRegistry, 'updateTask');
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true);
const getLocalModelPathMock = vi.spyOn(manager, 'getLocalModelPath').mockReturnValue('');
await manager.requestDownloadModel({
id: 'id',
url: 'url',
name: 'name',
} as ModelInfo);
expect(getLocalModelPathMock).toBeCalledWith('id');
expect(updateTaskMock).toHaveBeenLastCalledWith({
id: expect.any(String),
name: 'Model name already present on disk',
labels: {
'model-pulling': 'id',
},
state: 'success',
});
});
test('fail if model on disk has different sha of the expected value', async () => {
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
vi.spyOn(taskRegistry, 'updateTask');
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true);
vi.spyOn(manager, 'getLocalModelPath').mockReturnValue('path');
vi.spyOn(sha, 'hasValidSha').mockResolvedValue(false);
await expect(() =>
manager.requestDownloadModel({
id: 'id',
url: 'url',
name: 'name',
sha256: 'sha',
} as ModelInfo),
).rejects.toThrowError(
'Model name is already present on disk at path but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.',
);
});
test('multiple download request same model - second call after first completed', async () => {
mocks.getDownloaderCompleter.mockReturnValue(true);
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, 'appdir'));
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(false);
vi.spyOn(utils, 'getDurationSecondsSince').mockReturnValue(99);
await manager.requestDownloadModel({
id: 'id',
url: 'https:///url',
name: 'name',
} as ModelInfo);
await manager.requestDownloadModel({
id: 'id',
url: 'https:///url',
name: 'name',
} as ModelInfo);
// Only called once
expect(mocks.performDownloadMock).toHaveBeenCalledTimes(1);
expect(mocks.onEventDownloadMock).toHaveBeenCalledTimes(1);
});
test('multiple download request same model - second call before first completed', async () => {
mocks.getDownloaderCompleter.mockReturnValue(false);
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
modelHandlerRegistry.register(new URLModelHandler(manager, 'appdir'));
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(false);
vi.spyOn(utils, 'getDurationSecondsSince').mockReturnValue(99);
mocks.onEventDownloadMock.mockImplementation(listener => {
setTimeout(() => {
listener({
id: 'id',
status: 'completed',
duration: 1000,
});
}, 1000);
return {
dispose: vi.fn(),
};
});
await manager.requestDownloadModel({
id: 'id',
url: 'https:///url',
name: 'name',
} as ModelInfo);
await manager.requestDownloadModel({
id: 'id',
url: 'https:///url',
name: 'name',
} as ModelInfo);
// Only called once
expect(mocks.performDownloadMock).toHaveBeenCalledTimes(1);
expect(mocks.onEventDownloadMock).toHaveBeenCalledTimes(2);
});
});
describe('getModelMetadata', () => {
test('unknown model', async () => {
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: (): ModelInfo[] => [],
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await expect(() => manager.getModelMetadata('unknown-model-id')).rejects.toThrowError(
'model with id unknown-model-id does not exists.',
);
});
test('remote model', async () => {
const manager = new ModelsManager(
{} as RpcExtension,
{
getModels: (): ModelInfo[] => [
{
id: 'test-model-id',
url: 'dummy-url',
file: undefined,
} as unknown as ModelInfo,
],
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await manager.init();
const fakeMetadata: Record = {
hello: 'world',
};
vi.mocked(gguf).mockResolvedValue({
metadata: fakeMetadata,
} as unknown as GGUFParseOutput & { parameterCount: number });
const result = await manager.getModelMetadata('test-model-id');
expect(result).toStrictEqual(fakeMetadata);
expect(gguf).toHaveBeenCalledWith('dummy-url');
});
test('local model', async () => {
const manager = new ModelsManager(
rpcExtensionMock,
{
getModels: (): ModelInfo[] => [
{
id: 'test-model-id',
url: 'dummy-url',
file: {
file: 'random',
path: 'dummy-path',
},
} as unknown as ModelInfo,
],
onUpdate: vi.fn(),
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await manager.init();
const fakeMetadata: Record = {
hello: 'world',
};
vi.mocked(gguf).mockResolvedValue({
metadata: fakeMetadata,
} as unknown as GGUFParseOutput & { parameterCount: number });
const result = await manager.getModelMetadata('test-model-id');
expect(result).toStrictEqual(fakeMetadata);
expect(gguf).toHaveBeenCalledWith(path.join('dummy-path', 'random'), {
allowLocalFile: true,
});
});
});
const connectionMock: ContainerProviderConnection = {
name: 'dummy-connection',
type: 'podman',
vmType: undefined,
} as unknown as ContainerProviderConnection;
const modelMock: ModelInfo = {
id: 'test-model-id',
url: 'dummy-url',
file: {
file: 'random',
path: 'dummy-path',
},
} as unknown as ModelInfo;
describe('uploadModelToPodmanMachine', () => {
test('uploader should be used', async () => {
const performMock = vi.fn().mockResolvedValue('uploader-result');
vi.mocked(Uploader).mockReturnValue({
onEvent: vi.fn(),
perform: performMock,
} as unknown as Uploader);
const manager = new ModelsManager(
rpcExtensionMock,
{
onUpdate: vi.fn(),
getModels: () => [],
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await manager.init();
const result = await manager.uploadModelToPodmanMachine(connectionMock, modelMock);
expect(result).toBe('uploader-result');
expect(performMock).toHaveBeenCalledWith(modelMock.id);
});
test('upload should be skipped when configuration disable it', async () => {
vi.mocked(configurationRegistryMock.getExtensionConfiguration).mockReturnValue({
// disable upload
modelUploadDisabled: true,
modelsPath: '~/downloads',
experimentalTuning: false,
apiPort: 0,
inferenceRuntime: 'llama-cpp',
experimentalGPU: false,
showGPUPromotion: false,
appearance: 'dark',
});
const manager = new ModelsManager(
rpcExtensionMock,
{
onUpdate: vi.fn(),
getModels: () => [],
} as unknown as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
await manager.init();
await manager.uploadModelToPodmanMachine(connectionMock, modelMock);
expect(Uploader).not.toHaveBeenCalled();
});
});
================================================
FILE: packages/backend/src/managers/modelsManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { LocalModelInfo } from '@shared/models/ILocalModelInfo';
import fs from 'node:fs';
import * as path from 'node:path';
import { type Disposable, env, type ContainerProviderConnection } from '@podman-desktop/api';
import { MSG_NEW_MODELS_STATE } from '@shared/Messages';
import type { CatalogManager } from './catalogManager';
import type { ModelInfo } from '@shared/models/IModelInfo';
import * as podmanDesktopApi from '@podman-desktop/api';
import type { Downloader } from '../utils/downloader';
import type { TaskRegistry } from '../registries/TaskRegistry';
import type { Task } from '@shared/models/ITask';
import type { BaseEvent } from '../models/baseEvent';
import { isCompletionEvent, isProgressEvent } from '../models/baseEvent';
import { Uploader } from '../utils/uploader';
import { deleteRemoteModel, getLocalModelFile, isModelUploaded } from '../utils/modelsUtils';
import { getPodmanMachineName } from '../utils/podman';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import { getHash, hasValidSha } from '../utils/sha';
import type { GGUFParseOutput } from '@huggingface/gguf';
import { gguf } from '@huggingface/gguf';
import type { PodmanConnection } from './podmanConnection';
import { VMType } from '@shared/models/IPodman';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import type { ModelHandlerRegistry } from '../registries/ModelHandlerRegistry';
import type { RpcExtension } from '@shared/messages/MessageProxy';
export class ModelsManager implements Disposable {
#models: Map;
#disposables: Disposable[];
#downloaders: Map = new Map();
constructor(
private rpcExtension: RpcExtension,
private catalogManager: CatalogManager,
private telemetry: podmanDesktopApi.TelemetryLogger,
private taskRegistry: TaskRegistry,
private cancellationTokenRegistry: CancellationTokenRegistry,
private podmanConnection: PodmanConnection,
private configurationRegistry: ConfigurationRegistry,
private modelHandlerRegistry: ModelHandlerRegistry,
) {
this.#models = new Map();
this.#disposables = [];
this.modelHandlerRegistry.getAll().forEach(handler => handler.onUpdate(this.loadLocalModels));
}
async init(): Promise {
const disposable = this.catalogManager.onUpdate(() => {
this.loadLocalModels().catch((err: unknown) => {
console.error(`Something went wrong when loading local models`, err);
});
});
this.#disposables.push(disposable);
try {
await this.loadLocalModels();
} catch (err: unknown) {
console.error('Something went wrong while trying to load local models', err);
}
}
dispose(): void {
this.#models.clear();
this.#disposables.forEach(d => d.dispose());
}
async loadLocalModels(): Promise {
this.#models.clear();
this.catalogManager.getModels().forEach(m => this.#models.set(m.id, m));
const reloadLocalModels = async (): Promise => {
await this.getLocalModelsFromDisk();
await this.sendModelsInfo();
};
// Initialize the local models manually
await reloadLocalModels();
}
getModelsInfo(): ModelInfo[] {
return [...this.#models.values()];
}
async sendModelsInfo(): Promise {
const models = this.getModelsInfo();
await this.rpcExtension.fire(MSG_NEW_MODELS_STATE, models);
}
async getLocalModelsFromDisk(): Promise {
return Promise.all(this.modelHandlerRegistry.getAll().map(registry => registry.getLocalModelsFromDisk())).then(
() => void 0,
);
}
isModelOnDisk(modelId: string): boolean {
return this.#models.get(modelId)?.file !== undefined;
}
getLocalModelInfo(modelId: string): LocalModelInfo {
const model = this.#models.get(modelId);
if (!model?.file) {
throw new Error('model is not on disk');
}
return model.file;
}
getModelInfo(modelId: string): ModelInfo {
const model = this.#models.get(modelId);
if (!model) {
throw new Error('model is not loaded');
}
return model;
}
getLocalModelPath(modelId: string): string {
return getLocalModelFile(this.getModelInfo(modelId));
}
async deleteModel(modelId: string): Promise {
const model = this.#models.get(modelId);
if (!model?.file) {
throw new Error('model cannot be found.');
}
model.state = 'deleting';
await this.sendModelsInfo();
try {
await this.deleteRemoteModel(model);
// if model does not have any url, it has been imported locally by the user
if (!model.url) {
const modelPath = path.join(model.file.path, model.file.file);
// remove it from the catalog as it cannot be downloaded anymore
await this.catalogManager.removeUserModel(modelId);
await fs.promises.rm(modelPath, { recursive: true, force: true, maxRetries: 3 });
} else {
const modelHandler = this.modelHandlerRegistry.findModelHandler(model.url);
if (!modelHandler) {
throw new Error(`no model registry found for model ${model.id} url ${model.url}`);
}
await modelHandler.deleteModel(model);
}
this.telemetry.logUsage('model.delete', { 'model.id': getHash(modelId) });
model.file = model.state = undefined;
} catch (err: unknown) {
this.telemetry.logError('model.delete', {
'model.id': modelId,
message: 'error deleting model from disk',
error: err,
});
await podmanDesktopApi.window.showErrorMessage(`Error deleting model ${modelId}. ${String(err)}`);
// Let's reload the models manually to avoid any issue
model.state = undefined;
await this.getLocalModelsFromDisk();
} finally {
await this.sendModelsInfo();
}
}
private async deleteRemoteModel(modelInfo: ModelInfo): Promise {
// currently only Window is supported
if (!env.isWindows) {
return;
}
// get all container provider connections
const connections = this.podmanConnection.getContainerProviderConnections();
// iterate over all connections
for (const connection of connections) {
// ignore non-wsl machines
if (connection.vmType !== VMType.WSL) continue;
// Get the corresponding machine name
const machineName = getPodmanMachineName(connection);
// check if model already loaded on the podman machine
const existsRemote = await isModelUploaded(machineName, modelInfo);
if (!existsRemote) return;
await deleteRemoteModel(machineName, modelInfo);
}
}
/**
* This method will resolve when the provided model will be downloaded.
*
* This can method can be call multiple time for the same model, it will reuse existing downloader and wait on
* their completion.
* @param model
* @param labels
*/
async requestDownloadModel(model: ModelInfo, labels?: { [key: string]: string }): Promise {
// Create a task to follow progress
const task: Task = this.createDownloadTask(model, labels);
// Check there is no existing downloader running
const existingDownloader = this.#downloaders.get(model.id);
if (!existingDownloader) {
return this.downloadModel(model, task);
}
if (existingDownloader.completed) {
task.state = 'success';
this.taskRegistry.updateTask(task);
return existingDownloader.getTarget();
}
// Propagate cancellation token from existing task to the new one
task.cancellationToken = this.taskRegistry.findTaskByLabels({ 'model-pulling': model.id })?.cancellationToken;
this.taskRegistry.updateTask(task);
// If we have an existing downloader running we subscribe on its events
return new Promise((resolve, reject) => {
const disposable = existingDownloader.onEvent(event => {
if (!isCompletionEvent(event)) return;
switch (event.status) {
case 'completed':
resolve(existingDownloader.getTarget());
break;
default:
reject(new Error(event.message));
}
disposable.dispose();
});
});
}
private async onDownloadUploadEvent(event: BaseEvent, action: 'download' | 'upload'): Promise {
let taskLabel = 'model-pulling';
let eventName = 'model.download';
if (action === 'upload') {
taskLabel = 'model-uploading';
eventName = 'model.upload';
}
// Always use the task registry as source of truth for tasks
const tasks = this.taskRegistry.getTasksByLabels({ [taskLabel]: event.id });
if (tasks.length === 0) {
// tasks might have been cleared but still an error.
console.error(`received ${action} event but no task is associated.`);
return;
}
for (const task of tasks) {
if (isProgressEvent(event)) {
task.state = 'loading';
task.progress = event.value;
} else if (isCompletionEvent(event)) {
// status error or canceled
if (event.status === 'error' || event.status === 'canceled') {
task.state = 'error';
task.progress = undefined;
task.error = event.message;
// telemetry usage
this.telemetry.logError(eventName, {
'model.id': event.id,
message: `error ${action}ing model`,
error: event.message,
durationSeconds: event.duration,
});
} else {
task.state = 'success';
task.progress = 100;
// telemetry usage
this.telemetry.logUsage(eventName, { 'model.id': event.id, durationSeconds: event.duration });
}
// cleanup downloader
this.#downloaders.delete(event.id);
}
this.taskRegistry.updateTask(task); // update task
}
}
public createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader {
if (!model.url) {
throw new Error(`model ${model.id} does not have url defined.`);
}
const modelHandler = this.modelHandlerRegistry.findModelHandler(model.url);
if (!modelHandler) {
throw new Error(`no model registry found for model ${model.id} url ${model.url}`);
}
// Create a downloader
const downloader = modelHandler.createDownloader(model, abortSignal);
this.#downloaders.set(model.id, downloader);
return downloader;
}
private createDownloadTask(model: ModelInfo, labels?: { [key: string]: string }): Task {
// it may happen that the taskRegistry contains old entries representing an old failing download, we delete them as we are starting a new download
const failedPullingTaskIds = this.taskRegistry
.getTasksByLabels({
'model-pulling': model.id,
})
.filter(t => t.state === 'error')
.map(t => t.id);
if (failedPullingTaskIds.length > 0) {
this.taskRegistry.deleteAll(failedPullingTaskIds);
}
return this.taskRegistry.createTask(`Downloading model ${model.name}`, 'loading', {
...labels,
'model-pulling': model.id,
});
}
private async downloadModel(model: ModelInfo, task: Task): Promise {
// Check if the model is already on disk.
if (this.isModelOnDisk(model.id)) {
task.name = `Model ${model.name} already present on disk`;
const modelPath = this.getLocalModelPath(model.id);
if (model.sha256) {
const isValid = await hasValidSha(modelPath, model.sha256);
if (!isValid) {
task.state = 'error';
task.error = `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`;
this.taskRegistry.updateTask(task); // update task
throw new Error(
`Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`,
);
}
}
task.state = 'success';
this.taskRegistry.updateTask(task); // update task
// return model path
return modelPath;
}
const abortController = new AbortController();
task.cancellationToken = this.cancellationTokenRegistry.createCancellationTokenSource(() => {
abortController.abort('Cancel');
});
// update task to loading state
this.taskRegistry.updateTask(task);
const downloader = this.createDownloader(model, abortController.signal);
// Capture downloader events
downloader.onEvent(event => this.onDownloadUploadEvent(event, 'download'), this);
// perform download
await downloader.perform(model.id);
await this.updateModelInfos();
return downloader.getTarget();
}
async uploadModelToPodmanMachine(
connection: ContainerProviderConnection,
model: ModelInfo,
labels?: { [key: string]: string },
): Promise {
// ensure the model upload is not disabled
if (this.configurationRegistry.getExtensionConfiguration().modelUploadDisabled) {
console.warn('The model upload is disabled, this may cause the inference server to take a few minutes to start.');
return getLocalModelFile(model);
}
this.taskRegistry.createTask(`Copying model ${model.name} to ${connection.name}`, 'loading', {
...labels,
'model-uploading': model.id,
connection: connection.name,
});
const uploader = new Uploader(connection, model);
uploader.onEvent(event => this.onDownloadUploadEvent(event, 'upload'), this);
// perform download
const path = uploader.perform(model.id);
await this.updateModelInfos();
return path;
}
private async updateModelInfos(): Promise {
// refresh model lists on event completion
await this.getLocalModelsFromDisk();
this.sendModelsInfo().catch((err: unknown) => {
console.error('Something went wrong while sending models info.', err);
});
}
async getModelMetadata(modelId: string): Promise> {
const model = this.#models.get(modelId);
if (!model) throw new Error(`model with id ${modelId} does not exists.`);
const before = performance.now();
const data: Record = {
'model-id': getHash(modelId),
};
try {
let result: GGUFParseOutput<{ strict: false }>;
if (this.isModelOnDisk(modelId)) {
const modelPath = path.normalize(getLocalModelFile(model));
result = await gguf(modelPath, { allowLocalFile: true });
} else if (model.url) {
result = await gguf(model.url);
} else {
throw new Error('cannot get model metadata');
}
return result.metadata;
} catch (err: unknown) {
data['error'] = err;
console.error(err);
throw err;
} finally {
data['duration'] = performance.now() - before;
this.telemetry.logUsage('get-metadata', data);
}
}
}
================================================
FILE: packages/backend/src/managers/monitoringManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, afterEach, test, vi } from 'vitest';
import { MonitoringManager } from './monitoringManager';
import { containerEngine, type ContainerStatsInfo, type Disposable } from '@podman-desktop/api';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_MONITORING_UPDATE } from '@shared/Messages';
vi.mock('@podman-desktop/api', async () => {
return {
containerEngine: {
statsContainer: vi.fn(),
},
};
});
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
vi.mocked(containerEngine.statsContainer).mockResolvedValue({} as unknown as Disposable);
vi.useFakeTimers();
});
afterEach(() => {
vi.useRealTimers();
});
function simplifiedCallback(callback: (arg: ContainerStatsInfo) => void, cpu: number, ram: number): void {
callback({
cpu_stats: {
cpu_usage: {
total_usage: cpu,
},
},
memory_stats: {
usage: ram,
},
} as unknown as ContainerStatsInfo);
}
test('expect constructor to do nothing', () => {
const manager = new MonitoringManager(rpcExtensionMock);
expect(containerEngine.statsContainer).not.toHaveBeenCalled();
expect(manager.getStats().length).toBe(0);
expect(rpcExtensionMock.fire).not.toHaveBeenCalled();
});
test('expect monitor method to start stats container', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
await manager.monitor('randomContainerId', 'dummyEngineId');
expect(containerEngine.statsContainer).toHaveBeenCalledWith('dummyEngineId', 'randomContainerId', expect.anything());
});
test('expect monitor method to start stats container', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
await manager.monitor('randomContainerId', 'dummyEngineId');
expect(containerEngine.statsContainer).toHaveBeenCalledWith('dummyEngineId', 'randomContainerId', expect.anything());
});
test('expect dispose to dispose stats container', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
const fakeDisposable = vi.fn();
vi.mocked(containerEngine.statsContainer).mockResolvedValue({
dispose: fakeDisposable,
});
await manager.monitor('randomContainerId', 'dummyEngineId');
manager.dispose();
expect(fakeDisposable).toHaveBeenCalled();
});
test('expect webview to be notified when statsContainer call back', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
let mCallback: ((stats: ContainerStatsInfo) => void) | undefined;
vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => {
mCallback = callback;
return { dispose: (): void => {} };
});
await manager.monitor('randomContainerId', 'dummyEngineId');
await vi.waitFor(() => {
expect(mCallback).toBeDefined();
});
if (!mCallback) throw new Error('undefined mCallback');
const date = new Date(2000, 1, 1, 13);
vi.setSystemTime(date);
simplifiedCallback(mCallback, 123, 99);
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_MONITORING_UPDATE, [
{
containerId: 'randomContainerId',
stats: [
{
timestamp: Date.now(),
cpu_usage: 123,
memory_usage: 99,
},
],
},
]);
});
test('expect stats to cumulate', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
let mCallback: ((stats: ContainerStatsInfo) => void) | undefined;
vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => {
mCallback = callback;
return { dispose: (): void => {} };
});
await manager.monitor('randomContainerId', 'dummyEngineId');
await vi.waitFor(() => {
expect(mCallback).toBeDefined();
});
if (!mCallback) throw new Error('undefined mCallback');
simplifiedCallback(mCallback, 0, 0);
simplifiedCallback(mCallback, 1, 1);
simplifiedCallback(mCallback, 2, 2);
simplifiedCallback(mCallback, 3, 3);
const stats = manager.getStats();
expect(stats.length).toBe(1);
expect(stats[0].stats.length).toBe(4);
});
test('expect old stats to be removed', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
let mCallback: ((stats: ContainerStatsInfo) => void) | undefined;
vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => {
mCallback = callback;
return { dispose: (): void => {} };
});
await manager.monitor('randomContainerId', 'dummyEngineId');
await vi.waitFor(() => {
expect(mCallback).toBeDefined();
});
if (!mCallback) throw new Error('undefined mCallback');
vi.setSystemTime(new Date(2000, 1, 1, 13));
simplifiedCallback(mCallback, 0, 0);
vi.setSystemTime(new Date(2005, 1, 1, 13));
simplifiedCallback(mCallback, 1, 1);
simplifiedCallback(mCallback, 2, 2);
simplifiedCallback(mCallback, 3, 3);
const stats = manager.getStats();
expect(stats.length).toBe(1);
expect(stats[0].stats.length).toBe(3);
});
test('expect stats to be disposed if stats result is an error', async () => {
const manager = new MonitoringManager(rpcExtensionMock);
let mCallback: ((stats: ContainerStatsInfo) => void) | undefined;
const fakeDisposable = vi.fn();
vi.mocked(containerEngine.statsContainer).mockImplementation(async (_engineId, _id, callback) => {
mCallback = callback;
return { dispose: fakeDisposable };
});
await manager.monitor('randomContainerId', 'dummyEngineId');
await vi.waitFor(() => {
expect(mCallback).toBeDefined();
});
if (!mCallback) throw new Error('undefined mCallback');
mCallback({ cause: 'container is stopped' } as unknown as ContainerStatsInfo);
const stats = manager.getStats();
expect(stats.length).toBe(0);
expect(fakeDisposable).toHaveBeenCalled();
});
================================================
FILE: packages/backend/src/managers/monitoringManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { type Disposable, containerEngine, type ContainerStatsInfo } from '@podman-desktop/api';
import { Publisher } from '../utils/Publisher';
import { MSG_MONITORING_UPDATE } from '@shared/Messages';
import type { RpcExtension } from '@shared/messages/MessageProxy';
export interface StatsInfo {
timestamp: number;
cpu_usage: number;
memory_usage: number;
}
export interface StatsHistory {
containerId: string;
stats: StatsInfo[];
}
export const MAX_AGE: number = 5 * 60 * 1000; // 5 minutes
export class MonitoringManager extends Publisher implements Disposable {
#containerStats: Map;
#disposables: Disposable[];
constructor(rpcExtension: RpcExtension) {
super(rpcExtension, MSG_MONITORING_UPDATE, () => this.getStats());
this.#containerStats = new Map();
this.#disposables = [];
}
async monitor(containerId: string, engineId: string): Promise {
const disposable = await containerEngine.statsContainer(engineId, containerId, statsInfo => {
if ('cause' in statsInfo) {
console.error('Cannot stats container', statsInfo.cause);
disposable.dispose();
} else {
this.push(containerId, statsInfo);
}
});
this.#disposables.push(disposable);
return disposable;
}
private push(containerId: string, statsInfo: ContainerStatsInfo): void {
let stats: StatsInfo[] = [];
const statsHistory = this.#containerStats.get(containerId);
if (statsHistory) {
const limit = Date.now() - MAX_AGE;
stats = statsHistory.stats.filter(stats => stats.timestamp > limit);
}
this.#containerStats.set(containerId, {
containerId: containerId,
stats: [
...stats,
{
timestamp: Date.now(),
cpu_usage: statsInfo.cpu_stats.cpu_usage.total_usage,
memory_usage: statsInfo.memory_stats.usage,
},
],
});
this.notify();
}
clear(containerId: string): void {
this.#containerStats.delete(containerId);
}
getStats(): StatsHistory[] {
return Array.from(this.#containerStats.values());
}
dispose(): void {
this.#disposables.forEach(disposable => disposable.dispose());
}
}
================================================
FILE: packages/backend/src/managers/playground/McpServerManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, describe, expect, test, vi } from 'vitest';
import path from 'node:path';
import { type RpcExtension } from '@shared/messages/MessageProxy';
import { type McpClient, type McpServer, McpServerType, type McpSettings } from '@shared/models/McpSettings';
import { McpServerManager } from './McpServerManager';
import { JsonWatcher } from '../../utils/JsonWatcher';
import { toMcpClients } from '../../utils/mcpUtils';
vi.mock('../../utils/JsonWatcher');
vi.mock('../../utils/mcpUtils');
const mockJsonWatcher = {
init: vi.fn(),
dispose: vi.fn(),
onContentUpdated: vi.fn((fn: (mcpSettings: McpSettings) => void) => (update = fn)),
} as unknown as JsonWatcher;
const rpcExtension = { fire: vi.fn(() => Promise.resolve(true)) } as unknown as RpcExtension;
let update: (mcpSettings: McpSettings) => void;
let appUserDirectory: string;
let mcpServerManager: McpServerManager;
beforeEach(async () => {
vi.resetAllMocks();
vi.mocked(JsonWatcher).mockReturnValue(mockJsonWatcher);
vi.mocked(toMcpClients).mockImplementation(async (...mcpServers) =>
mcpServers.map(s => ({ name: s.name }) as unknown as McpClient),
);
appUserDirectory = path.join('/', 'tmp', 'mcp-server-manager-test-');
mcpServerManager = new McpServerManager(rpcExtension, appUserDirectory);
});
test('provides an empty default value', () => {
expect(mcpServerManager.getMcpSettings()).toEqual({ servers: {} });
});
test('init initializes the watcher', () => {
mcpServerManager.init();
expect(mockJsonWatcher.init).toHaveBeenCalled();
});
test('dispose disposes the watcher', () => {
mcpServerManager.dispose();
expect(mockJsonWatcher.dispose).toHaveBeenCalled();
});
describe('when loading mcp-settings.json', () => {
beforeEach(() => {
const mcpSettings = {
servers: {
'stdio-ok': {
enabled: true,
type: 'stdio',
command: 'npx',
args: ['-y', 'kubernetes-mcp-server'],
},
'sse-ok': {
enabled: true,
type: 'sse',
url: 'https://echo.example.com/sse',
headers: {
foo: 'bar',
},
},
'invalid-type': {
enabled: true,
type: 'invalid',
url: 'https://echo.example.com/sse',
},
},
} as unknown as McpSettings;
update(mcpSettings);
});
test('loads valid servers', () => {
expect(mcpServerManager.getMcpSettings().servers).toEqual(
expect.objectContaining({
'stdio-ok': {
enabled: true,
name: 'stdio-ok',
type: McpServerType.STDIO,
command: 'npx',
args: ['-y', 'kubernetes-mcp-server'],
},
'sse-ok': {
enabled: true,
name: 'sse-ok',
type: McpServerType.SSE,
url: 'https://echo.example.com/sse',
headers: { foo: 'bar' },
},
}),
);
});
test('ignores invalid servers', () => {
expect(mcpServerManager.getMcpSettings().servers['invalid-type']).toBeUndefined();
});
});
test('toMcpClients returns the enabled servers', async () => {
mcpServerManager.init();
update({
servers: {
enabled: { enabled: true, type: McpServerType.STDIO } as unknown as McpServer,
disabled: { enabled: false, type: McpServerType.STDIO } as unknown as McpServer,
},
});
const mcpClients = await mcpServerManager.toMcpClients();
expect(mcpClients).toEqual([{ name: 'enabled' }]);
});
================================================
FILE: packages/backend/src/managers/playground/McpServerManager.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import path from 'node:path';
import { type Disposable } from '@podman-desktop/api';
import { MSG_MCP_SERVERS_UPDATE } from '@shared/Messages';
import { type McpSettings, McpServerType, type McpClient } from '@shared/models/McpSettings';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { JsonWatcher } from '../../utils/JsonWatcher';
import { Publisher } from '../../utils/Publisher';
import { toMcpClients } from '../../utils/mcpUtils';
// TODO: Agree on the name of the file and its location
const MCP_SETTINGS = 'mcp-settings.json';
export class McpServerManager extends Publisher implements Disposable {
private readonly settingsFile: string;
private mcpSettings: McpSettings;
readonly #jsonWatcher: JsonWatcher;
constructor(
rpcExtension: RpcExtension,
private appUserDirectory: string,
) {
super(rpcExtension, MSG_MCP_SERVERS_UPDATE, () => this.getMcpSettings());
this.settingsFile = path.join(this.appUserDirectory, MCP_SETTINGS);
this.mcpSettings = {
servers: {},
};
this.#jsonWatcher = new JsonWatcher(this.settingsFile, { ...this.mcpSettings });
this.#jsonWatcher.onContentUpdated(this.onMcpSettingsUpdated.bind(this));
}
/**
* Lazily initialize the MCP server manager dependencies.
*/
init(): void {
this.#jsonWatcher.init();
}
private onMcpSettingsUpdated(mcpSettings: McpSettings): void {
this.mcpSettings = { servers: {} };
for (const [name, mcpServer] of Object.entries(mcpSettings.servers ?? {})) {
mcpServer.name = name;
if (!Object.values(McpServerType).includes(mcpServer.type)) {
console.warn(`McpServerManager: Invalid MCP server type ${mcpServer.type} for server ${mcpServer.name}.`);
continue;
}
this.mcpSettings.servers[name] = mcpServer;
}
this.notify();
}
getMcpSettings(): McpSettings {
return this.mcpSettings;
}
async toMcpClients(): Promise {
const enabledServers = Object.values(this.mcpSettings.servers).filter(server => server.enabled);
return toMcpClients(...enabledServers);
}
dispose(): void {
this.#jsonWatcher.dispose();
}
}
================================================
FILE: packages/backend/src/managers/playground/aiSdk.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { describe, test, expect, beforeEach, vi } from 'vitest';
import * as ai from 'ai';
import { MockLanguageModelV3 } from 'ai/test';
import { AiStreamProcessor, toCoreMessage } from './aiSdk';
import type {
AssistantChat,
ChatMessage,
Conversation,
ErrorMessage,
Message,
PendingChat,
UserChat,
} from '@shared/models/IPlaygroundMessage';
import type {
LanguageModelV3,
LanguageModelV2CallWarning,
LanguageModelV3StreamPart,
LanguageModelV3GenerateResult,
} from '@ai-sdk/provider';
import { ConversationRegistry } from '../../registries/ConversationRegistry';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import type { ModelOptions } from '@shared/models/IModelOptions';
import type { ToolSet } from 'ai';
import { jsonSchema, simulateStreamingMiddleware, tool, wrapLanguageModel } from 'ai';
vi.mock('ai', async original => {
const mod = (await original()) as object;
return { ...mod };
});
/* eslint-disable sonarjs/no-nested-functions */
describe('aiSdk', () => {
beforeEach(() => {
vi.resetAllMocks();
});
describe('toCoreMessage', () => {
test('with no fields', () => {
const result = toCoreMessage({} as Message);
expect(result).toEqual([]);
});
test('with no role', () => {
const result = toCoreMessage({ content: 'alex' } as ChatMessage);
expect(result).toEqual([]);
});
test('with no content', () => {
const result = toCoreMessage({ role: 'user' } as ChatMessage);
expect(result).toEqual([{ role: 'user', content: '' }]);
});
test('with all fields', () => {
const result = toCoreMessage({ role: 'user', content: 'alex' } as ChatMessage);
expect(result).toEqual([{ role: 'user', content: 'alex' }]);
});
test('with multiple messages', () => {
const result = toCoreMessage(
{ role: 'user', content: 'alex' } as ChatMessage,
{ role: 'assistant', content: 'bob' } as ChatMessage,
);
expect(result).toEqual([
{ role: 'user', content: 'alex' },
{ role: 'assistant', content: 'bob' },
]);
});
test('with tool call messages', () => {
const result = toCoreMessage(
{ role: 'user', content: 'alex' } as ChatMessage,
{
role: 'assistant',
content: {
type: 'tool-call',
toolCallId: 'call-001',
toolName: 'tool-1',
args: {},
result: {
content: [{ type: 'text', text: 'Success!!!' }],
},
},
} as AssistantChat,
{ role: 'assistant', content: 'The call to the tool was a success!' } as AssistantChat,
);
expect(result).toEqual([
{ role: 'user', content: 'alex' },
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'call-001',
toolName: 'tool-1',
input: {},
},
],
},
{
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: 'call-001',
toolName: 'tool-1',
output: {
content: [{ type: 'text', text: 'Success!!!' }],
},
},
],
},
{ role: 'assistant', content: 'The call to the tool was a success!' },
]);
});
});
describe('AiStreamProcessor', () => {
let conversationRegistry: ConversationRegistry;
let conversationId: string;
beforeEach(() => {
const rpcExtension = {
fire: vi.fn().mockResolvedValue(true),
} as unknown as RpcExtension;
conversationRegistry = new ConversationRegistry(rpcExtension);
conversationId = conversationRegistry.createConversation('test-conversation', 'test-model');
conversationRegistry.submit(conversationId, {
content: 'Aitana, please proceed with the test',
role: 'user',
id: conversationRegistry.getUniqueId(),
timestamp: Date.now(),
} as UserChat);
});
test('sends model options', async () => {
const streamTextSpy = vi.spyOn(ai, 'streamText');
const streamProcessor = new AiStreamProcessor(conversationId, conversationRegistry);
const streamResult = streamProcessor.stream(createTestModel(), undefined, {
temperature: 42,
top_p: 13,
max_tokens: 37,
stream_options: { include_usage: true },
} as ModelOptions);
await streamResult.consumeStream();
expect(streamTextSpy).toHaveBeenCalledWith(
expect.objectContaining({
model: expect.anything(),
temperature: 42,
maxOutputTokens: 37,
topP: 13,
abortSignal: expect.any(AbortSignal),
messages: expect.any(Array),
onStepFinish: expect.any(Function),
onError: expect.any(Function),
onChunk: expect.any(Function),
}),
);
});
test('abort, completes the last assistant message', async () => {
const incompleteMessageId = 'incomplete-message-id';
conversationRegistry.submit(conversationId, {
id: incompleteMessageId,
role: 'assistant',
timestamp: Date.now(),
choices: [],
completed: undefined,
} as PendingChat);
const streamProcessor = new AiStreamProcessor(conversationId, conversationRegistry);
streamProcessor['currentMessageId'] = incompleteMessageId;
streamProcessor.abortController.abort('cancel');
expect(conversationRegistry.get(conversationId).messages).toHaveLength(2);
expect((conversationRegistry.get(conversationId).messages[1] as AssistantChat).completed).not.toBeUndefined();
});
describe('with stream error', () => {
beforeEach(async () => {
// eslint-disable-next-line sonarjs/no-nested-functions
const doStream: LanguageModelV3['doStream'] = async () => {
throw new Error('The stream is kaput.');
};
const model = new MockLanguageModelV3({ doStream });
await new AiStreamProcessor(conversationId, conversationRegistry).stream(model).consumeStream();
});
test('appends a single message', () => {
expect(conversationRegistry.get(conversationId).messages).toHaveLength(2);
});
test('appended message is error', () => {
expect((conversationRegistry.get(conversationId).messages[1] as ErrorMessage).error).toEqual(
'The stream is kaput.',
);
});
});
describe('with single message stream', () => {
let model: LanguageModelV3;
beforeEach(async () => {
model = createTestModel({
stream: ai.simulateReadableStream({
chunks: [
{
type: 'response-metadata',
id: 'id-0',
modelId: 'mock-model-id',
timestamp: new Date(0),
},
{ type: 'text-delta', id: 'id-1', delta: 'Greetings' },
{ type: 'text-delta', id: 'id-2', delta: ' professor ' },
{ type: 'text-delta', id: 'id-3', delta: `Falken` },
{
type: 'finish',
finishReason: { unified: 'stop', raw: undefined },
usage: {
outputTokens: { total: 133, text: undefined, reasoning: undefined },
inputTokens: { total: 7, noCache: undefined, cacheRead: undefined, cacheWrite: undefined },
totalTokens: 140,
},
},
],
}),
});
await new AiStreamProcessor(conversationId, conversationRegistry).stream(model).consumeStream();
});
test('appends a single message', () => {
expect(conversationRegistry.get(conversationId).messages).toHaveLength(2);
});
test('appended message is from assistant', () => {
expect((conversationRegistry.get(conversationId).messages[1] as ChatMessage).role).toEqual('assistant');
});
test('concatenates message content', () => {
expect((conversationRegistry.get(conversationId).messages[1] as ChatMessage).content).toEqual(
'Greetings professor Falken',
);
});
test('setsUsage', async () => {
const conversation = conversationRegistry.get(conversationId) as Conversation;
expect(conversation?.usage?.completion_tokens).toEqual(133);
expect(conversation?.usage?.prompt_tokens).toEqual(7);
});
});
describe('with wrapped generated multiple messages as stream', () => {
let model: LanguageModelV3;
let tools: ToolSet;
let generateStep: number;
beforeEach(async () => {
generateStep = 0;
model = wrapLanguageModel({
model: new MockLanguageModelV3({
doGenerate: async (): Promise => {
if (generateStep++ === 0) {
return {
content: [
{
type: 'tool-call',
toolCallId: 'call-001',
toolName: 'tool-1',
input: '{}',
},
{
type: 'tool-call',
toolCallId: 'call-002',
toolName: 'tool-1',
input: '{}',
},
],
finishReason: { unified: 'tool-calls', raw: undefined },
usage: {
inputTokens: { total: 1, noCache: undefined, cacheRead: undefined, cacheWrite: undefined },
outputTokens: { total: 1, text: undefined, reasoning: undefined },
},
warnings: [],
};
}
return {
content: [
{
type: 'text',
text: 'These are the results of you functions: huge success!',
},
],
finishReason: { unified: 'stop', raw: undefined },
usage: {
inputTokens: { total: 133, noCache: undefined, cacheRead: undefined, cacheWrite: undefined },
outputTokens: { total: 7, text: undefined, reasoning: undefined },
},
warnings: [],
};
},
}),
middleware: simulateStreamingMiddleware(),
});
tools = {
'tool-1': tool({
inputSchema: jsonSchema({ type: 'object' }),
execute: async () => 'successful result!',
}),
};
await new AiStreamProcessor(conversationId, conversationRegistry).stream(model, tools).consumeStream();
});
test('appends multiple messages', () => {
expect(conversationRegistry.get(conversationId).messages).toHaveLength(4);
});
test.each<{ index: number; toolCallId: string }>([
{ index: 1, toolCallId: 'call-001' },
{ index: 2, toolCallId: 'call-002' },
])(`appends tool call (to tool-1) message at $index`, ({ index, toolCallId }) => {
const message = conversationRegistry.get(conversationId).messages[index] as AssistantChat;
expect(message.role).toEqual('assistant');
expect(message.content).toMatchObject({
type: 'tool-call',
toolCallId,
toolName: 'tool-1',
args: {},
});
});
test.each<{ index: number; id: string; toolCallId: string }>([
{ index: 1, id: '3', toolCallId: 'call-001' },
{ index: 2, id: '4', toolCallId: 'call-002' },
])(`sets tool result message at $index for $toolCallId`, ({ index, id, toolCallId }) => {
const message = conversationRegistry.get(conversationId).messages[index] as AssistantChat;
expect(message.id).toEqual(id);
expect(message.timestamp).toBeDefined();
expect(message.role).toEqual('assistant');
expect(message.content).toMatchObject({
type: 'tool-call',
toolCallId,
toolName: 'tool-1',
args: {},
});
if (message.content && typeof message.content === 'object' && 'result' in message.content) {
expect(message.content.result).toEqual('successful result!');
expect(message.completed).toBeDefined();
}
});
test('appends final assistant message', () => {
const message = conversationRegistry.get(conversationId).messages[3] as AssistantChat;
expect(message.role).toEqual('assistant');
expect(message.content).toEqual('These are the results of you functions: huge success!');
});
test('setsUsage', async () => {
const conversation = conversationRegistry.get(conversationId) as Conversation;
expect(conversation?.usage?.completion_tokens).toEqual(7);
expect(conversation?.usage?.prompt_tokens).toEqual(133);
});
});
});
});
export function createTestModel({
stream = ai.simulateReadableStream({ chunks: [] }),
rawCall = { rawPrompt: 'prompt', rawSettings: {} },
rawResponse = undefined,
request = undefined,
warnings,
}: {
stream?: ReadableStream;
rawResponse?: { headers: Record };
rawCall?: { rawPrompt: string; rawSettings: Record };
request?: { body: string };
warnings?: LanguageModelV2CallWarning[];
} = {}): LanguageModelV3 {
return new MockLanguageModelV3({
doStream: async () => ({ stream, rawCall, rawResponse, request, warnings }),
});
}
================================================
FILE: packages/backend/src/managers/playground/aiSdk.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { streamText, stepCountIs } from 'ai';
import type {
LanguageModel,
ModelMessage,
StepResult,
StreamTextResult,
StreamTextOnFinishCallback,
TextStreamPart,
ToolCallPart,
ToolResultPart,
ToolSet,
} from 'ai';
import type { ModelOptions } from '@shared/models/IModelOptions';
import {
type AssistantChat,
type ErrorMessage,
isAssistantToolCall,
type Message,
type ModelUsage,
type PendingChat,
type ToolCall,
} from '@shared/models/IPlaygroundMessage';
import { isChatMessage } from '@shared/models/IPlaygroundMessage';
import type { ConversationRegistry } from '../../registries/ConversationRegistry';
export function toCoreMessage(...messages: Message[]): ModelMessage[] {
const ret: ModelMessage[] = [];
for (const message of messages) {
if (isAssistantToolCall(message)) {
const toolCall = message.content as ToolCall;
ret.push({
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: toolCall.toolCallId,
toolName: toolCall.toolName,
input: toolCall.args,
} as ToolCallPart,
] as ToolCallPart[],
} as ModelMessage);
if (toolCall.result) {
ret.push({
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: toolCall.toolCallId,
toolName: toolCall.toolName,
output: toolCall.result,
} as ToolResultPart,
] as ToolResultPart[],
} as ModelMessage);
}
} else if (isChatMessage(message)) {
ret.push({
role: message.role,
content: message.content ?? '',
} as ModelMessage);
}
}
return ret;
}
export class AiStreamProcessor {
private stepStartTime: number | undefined;
private currentMessageId: string | undefined;
public readonly abortController: AbortController;
constructor(
private conversationId: string,
private conversationRegistry: ConversationRegistry,
) {
this.abortController = new AbortController();
this.abortController.signal.addEventListener('abort', this.onAbort);
}
private onStepFinish = (stepResult: StepResult): void => {
this.conversationRegistry.setUsage(this.conversationId, {
completion_tokens: stepResult.usage.outputTokens,
prompt_tokens: stepResult.usage.inputTokens,
} as ModelUsage);
if (this.currentMessageId !== undefined) {
this.conversationRegistry.completeMessage(this.conversationId, this.currentMessageId);
}
if (stepResult.toolCalls?.length > 0) {
for (const toolCall of stepResult.toolCalls) {
this.conversationRegistry.submit(this.conversationId, {
id: this.conversationRegistry.getUniqueId(),
role: 'assistant',
timestamp: this.stepStartTime,
content: {
type: 'tool-call',
toolCallId: toolCall.toolCallId,
toolName: toolCall.toolName,
args: toolCall.input,
} as ToolCall,
} as AssistantChat);
}
}
if (stepResult.toolResults?.length > 0) {
for (const toolResult of stepResult.toolResults) {
this.conversationRegistry.toolResult(
this.conversationId,
toolResult.toolCallId,
toolResult.output as string | object,
);
}
}
this.currentMessageId = undefined;
this.stepStartTime = Date.now();
};
private onChunk = ({ chunk }: { chunk: TextStreamPart }): void => {
if (chunk.type !== 'text-delta') {
return;
}
if (this.currentMessageId === undefined) {
this.currentMessageId = this.conversationRegistry.getUniqueId();
this.conversationRegistry.submit(this.conversationId, {
id: this.currentMessageId,
role: 'assistant',
timestamp: this.stepStartTime,
choices: [],
completed: undefined,
} as PendingChat);
}
this.conversationRegistry.textDelta(this.conversationId, this.currentMessageId, chunk.text);
};
private onError = (error: unknown): void => {
if (error instanceof Object && 'error' in error) {
error = error.error;
}
if (error instanceof Error) {
error = error.message;
}
let errorMessage = String(error);
if (errorMessage.endsWith('Please reduce the length of the messages or completion.')) {
errorMessage += ' Note: You should start a new playground.';
}
console.error('Something went wrong while creating model response', errorMessage);
this.conversationRegistry.submit(this.conversationId, {
id: this.conversationRegistry.getUniqueId(),
timestamp: Date.now(),
error: errorMessage,
} as ErrorMessage);
};
private onAbort = (): void => {
// Ensure the last message is marked as complete to allow the user to resume the conversation
if (this.currentMessageId !== undefined) {
this.conversationRegistry.completeMessage(this.conversationId, this.currentMessageId);
}
};
private onFinish: StreamTextOnFinishCallback = stepResult => {
this.conversationRegistry.setUsage(this.conversationId, {
completion_tokens: stepResult.usage.outputTokens,
prompt_tokens: stepResult.usage.inputTokens,
} as ModelUsage);
};
stream = (model: LanguageModel, tools?: TOOLS, options?: ModelOptions): StreamTextResult => {
this.stepStartTime = Date.now();
return streamText({
model,
tools,
stopWhen: stepCountIs(10),
temperature: options?.temperature,
maxOutputTokens: (options?.max_tokens ?? -1) < 1 ? undefined : options?.max_tokens,
topP: options?.top_p,
abortSignal: this.abortController.signal,
messages: toCoreMessage(...this.conversationRegistry.get(this.conversationId).messages),
onStepFinish: this.onStepFinish,
onError: this.onError,
onChunk: this.onChunk,
onFinish: this.onFinish,
});
};
}
================================================
FILE: packages/backend/src/managers/playgroundV2Manager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { expect, test, vi, beforeEach, afterEach, describe } from 'vitest';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { PlaygroundV2Manager } from './playgroundV2Manager';
import type { TelemetryLogger } from '@podman-desktop/api';
import type { InferenceServer } from '@shared/models/IInference';
import type { InferenceManager } from './inference/inferenceManager';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { TaskRegistry } from '../registries/TaskRegistry';
import type { Task, TaskState } from '@shared/models/ITask';
import type { ChatMessage, ErrorMessage } from '@shared/models/IPlaygroundMessage';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_CONVERSATIONS_UPDATE } from '@shared/Messages';
import type { LanguageModelV2CallWarning, LanguageModelV3, LanguageModelV3StreamPart } from '@ai-sdk/provider';
import { type McpServerManager } from './playground/McpServerManager';
import { MockLanguageModelV3 } from 'ai/test';
import { simulateReadableStream } from 'ai';
vi.mock('@ai-sdk/openai-compatible', () => ({
createOpenAICompatible: vi.fn(),
}));
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
const inferenceManagerMock = {
get: vi.fn(),
getServers: vi.fn(),
createInferenceServer: vi.fn(),
startInferenceServer: vi.fn(),
} as unknown as InferenceManager;
const taskRegistryMock = {
createTask: vi.fn(),
getTasksByLabels: vi.fn(),
updateTask: vi.fn(),
} as unknown as TaskRegistry;
const telemetryMock = {
logUsage: vi.fn(),
logError: vi.fn(),
} as unknown as TelemetryLogger;
const cancellationTokenRegistryMock = {
createCancellationTokenSource: vi.fn(),
delete: vi.fn(),
} as unknown as CancellationTokenRegistry;
let mcpServerManager: McpServerManager;
let createTestModel: (options: {
stream?: ReadableStream;
rawResponse?: { headers: Record };
rawCall?: { rawPrompt: string; rawSettings: Record };
request?: { body: string };
warnings?: LanguageModelV2CallWarning[];
}) => LanguageModelV3;
beforeEach(async () => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
vi.useFakeTimers();
mcpServerManager = {
getMcpSettings: vi.fn(() => {}),
toMcpClients: vi.fn(() => []),
} as unknown as McpServerManager;
createTestModel = (await import('./playground/aiSdk.spec')).createTestModel;
});
afterEach(async () => {
vi.useRealTimers();
});
test('manager should be properly initialized', () => {
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
expect(manager.getConversations().length).toBe(0);
});
test('submit should throw an error if the server is stopped', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
models: [
{
id: 'model1',
},
],
} as unknown as InferenceServer,
]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground('playground 1', { id: 'model1' } as ModelInfo, 'tracking-1');
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'stopped',
models: [
{
id: 'model1',
},
],
} as unknown as InferenceServer,
]);
await expect(manager.submit(manager.getConversations()[0].id, 'dummyUserInput')).rejects.toThrowError(
'Inference server is not running.',
);
});
test('submit should throw an error if the server is unhealthy', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
health: {
Status: 'unhealthy',
},
models: [
{
id: 'model1',
},
],
} as unknown as InferenceServer,
]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground('p1', { id: 'model1' } as ModelInfo, 'tracking-1');
const playgroundId = manager.getConversations()[0].id;
await expect(manager.submit(playgroundId, 'dummyUserInput')).rejects.toThrowError(
'Inference server is not healthy, currently status: unhealthy.',
);
});
test('create playground should create conversation.', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
health: {
Status: 'healthy',
},
models: [
{
id: 'dummyModelId',
file: {
file: 'dummyModelFile',
},
},
],
} as unknown as InferenceServer,
]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
expect(manager.getConversations().length).toBe(0);
await manager.createPlayground('playground 1', { id: 'model-1' } as ModelInfo, 'tracking-1');
const conversations = manager.getConversations();
expect(conversations.length).toBe(1);
});
test('valid submit should create IPlaygroundMessage and notify the webview', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
health: {
Status: 'healthy',
},
models: [
{
id: 'dummyModelId',
file: {
path: '.',
file: 'dummyModelFile',
},
},
],
connection: {
port: 8888,
},
labels: [],
} as unknown as InferenceServer,
]);
// @ts-expect-error - Mock return type for testing
vi.mocked(createOpenAICompatible).mockReturnValue(() =>
createTestModel({
stream: simulateReadableStream({
chunks: [
{ type: 'text-delta', id: 'id-1', delta: 'The message from the model' },
{
type: 'finish',
finishReason: { unified: 'stop', raw: undefined },
usage: {
outputTokens: { total: 133, text: undefined, reasoning: undefined },
inputTokens: { total: 7, noCache: undefined, cacheRead: undefined, cacheWrite: undefined },
},
},
],
}),
}),
);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground('playground 1', { id: 'dummyModelId' } as ModelInfo, 'tracking-1');
const date = new Date(2000, 1, 1, 13);
vi.setSystemTime(date);
const playgrounds = manager.getConversations();
await manager.submit(playgrounds[0].id, 'dummyUserInput');
// Wait for assistant message to be completed
await vi.waitFor(() => {
expect(manager.getConversations()[0].usage?.completion_tokens).toBeGreaterThan(0);
});
const conversations = manager.getConversations();
expect(conversations.length).toBe(1);
expect(conversations[0].messages.length).toBe(2);
expect(conversations[0].messages[0]).toStrictEqual({
content: 'dummyUserInput',
id: expect.anything(),
options: undefined,
role: 'user',
timestamp: expect.any(Number),
});
expect(conversations[0].messages[1]).toStrictEqual({
choices: undefined,
completed: expect.any(Number),
content: 'The message from the model',
id: expect.anything(),
role: 'assistant',
timestamp: expect.any(Number),
});
expect(conversations[0].usage).toStrictEqual({
completion_tokens: 133,
prompt_tokens: 7,
});
expect(rpcExtensionMock.fire).toHaveBeenLastCalledWith(MSG_CONVERSATIONS_UPDATE, conversations);
});
test('error', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
health: {
Status: 'healthy',
},
models: [
{
id: 'dummyModelId',
file: {
path: '.',
file: 'dummyModelFile',
},
},
],
connection: {
port: 8888,
},
labels: [],
} as unknown as InferenceServer,
]);
const doStream: LanguageModelV3['doStream'] = async () => {
throw new Error('Please reduce the length of the messages or completion.');
};
vi.mocked(createOpenAICompatible).mockReturnValue(
// @ts-expect-error MockLanguageModelV2 test mock
// eslint-disable-next-line sonarjs/new-operator-misuse
() =>
new (MockLanguageModelV3 as unknown as new (options: {
doStream: LanguageModelV3['doStream'];
}) => LanguageModelV3)({ doStream }),
);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground('playground 1', { id: 'dummyModelId' } as ModelInfo, 'tracking-1');
const date = new Date(2000, 1, 1, 13);
vi.setSystemTime(date);
const playgrounds = manager.getConversations();
await manager.submit(playgrounds[0].id, 'dummyUserInput');
// Wait for error message
await vi.waitFor(() => {
expect((manager.getConversations()[0].messages[1] as ErrorMessage).error).toBeDefined();
});
const conversations = manager.getConversations();
expect(conversations.length).toBe(1);
expect(conversations[0].messages.length).toBe(2);
expect(conversations[0].messages[0]).toStrictEqual({
content: 'dummyUserInput',
id: expect.anything(),
options: undefined,
role: 'user',
timestamp: expect.any(Number),
});
expect(conversations[0].messages[1]).toStrictEqual({
error: 'Please reduce the length of the messages or completion. Note: You should start a new playground.',
id: expect.anything(),
timestamp: expect.any(Number),
});
expect(rpcExtensionMock.fire).toHaveBeenLastCalledWith(MSG_CONVERSATIONS_UPDATE, conversations);
});
test('creating a new playground should send new playground to frontend', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground(
'a name',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_CONVERSATIONS_UPDATE, [
{
id: expect.anything(),
modelId: 'model-1',
name: 'a name',
messages: [],
usage: {
completion_tokens: 0,
prompt_tokens: 0,
},
},
]);
});
test('creating a new playground with no name should send new playground to frontend with generated name', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground(
'',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_CONVERSATIONS_UPDATE, [
{
id: expect.anything(),
modelId: 'model-1',
name: 'playground 1',
messages: [],
usage: {
completion_tokens: 0,
prompt_tokens: 0,
},
},
]);
});
test('creating a new playground with no model served should start an inference server', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]);
const createInferenceServerMock = vi.mocked(inferenceManagerMock.createInferenceServer);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground(
'a name',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
expect(createInferenceServerMock).toHaveBeenCalledWith({
gpuLayers: expect.any(Number),
image: undefined,
providerId: undefined,
inferenceProvider: undefined,
labels: {
trackingId: 'tracking-1',
},
modelsInfo: [
{
id: 'model-1',
name: 'Model 1',
},
],
port: expect.anything(),
});
});
test('creating a new playground with the model already served should not start an inference server', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
models: [
{
id: 'model-1',
},
],
},
] as InferenceServer[]);
const createInferenceServerMock = vi.mocked(inferenceManagerMock.createInferenceServer);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground(
'a name',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
expect(createInferenceServerMock).not.toHaveBeenCalled();
});
test('creating a new playground with the model server stopped should start the inference server', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
models: [
{
id: 'model-1',
},
],
status: 'stopped',
container: {
containerId: 'container-1',
},
},
] as InferenceServer[]);
const createInferenceServerMock = vi.mocked(inferenceManagerMock.createInferenceServer);
const startInferenceServerMock = vi.mocked(inferenceManagerMock.startInferenceServer);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground(
'a name',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
expect(createInferenceServerMock).not.toHaveBeenCalled();
expect(startInferenceServerMock).toHaveBeenCalledWith('container-1');
});
test('delete conversation should delete the conversation', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
expect(manager.getConversations().length).toBe(0);
await manager.createPlayground(
'a name',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
const conversations = manager.getConversations();
expect(conversations.length).toBe(1);
manager.deleteConversation(conversations[0].id);
expect(manager.getConversations().length).toBe(0);
expect(rpcExtensionMock.fire).toHaveBeenCalled();
});
test('creating a new playground with an existing name should fail', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground(
'a name',
{
id: 'model-1',
name: 'Model 1',
} as unknown as ModelInfo,
'tracking-1',
);
await expect(
manager.createPlayground(
'a name',
{
id: 'model-2',
name: 'Model 2',
} as unknown as ModelInfo,
'tracking-2',
),
).rejects.toThrowError('a playground with the name a name already exists');
});
test('requestCreatePlayground should call createPlayground and createTask, then updateTask', async () => {
vi.useRealTimers();
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
const createTaskMock = vi.mocked(taskRegistryMock).createTask;
const updateTaskMock = vi.mocked(taskRegistryMock).updateTask;
createTaskMock.mockImplementation((_name: string, _state: TaskState, labels?: { [id: string]: string }) => {
return {
labels,
} as Task;
});
const createPlaygroundSpy = vi.spyOn(manager, 'createPlayground').mockResolvedValue('playground-1');
const id = await manager.requestCreatePlayground('a name', { id: 'model-1' } as ModelInfo);
expect(createPlaygroundSpy).toHaveBeenCalledWith('a name', { id: 'model-1' } as ModelInfo, expect.any(String));
expect(createTaskMock).toHaveBeenCalledWith('Creating Playground environment', 'loading', {
trackingId: id,
});
await new Promise(resolve => setTimeout(resolve, 0));
expect(updateTaskMock).toHaveBeenCalledWith({
labels: {
trackingId: id,
playgroundId: 'playground-1',
},
state: 'success',
});
});
test('requestCreatePlayground should call createPlayground and createTask, then updateTask when createPlayground fails', async () => {
vi.useRealTimers();
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
const createTaskMock = vi.mocked(taskRegistryMock).createTask;
const updateTaskMock = vi.mocked(taskRegistryMock).updateTask;
const getTasksByLabelsMock = vi.mocked(taskRegistryMock).getTasksByLabels;
createTaskMock.mockImplementation((_name: string, _state: TaskState, labels?: { [id: string]: string }) => {
return {
labels,
} as Task;
});
const createPlaygroundSpy = vi.spyOn(manager, 'createPlayground').mockRejectedValue(new Error('an error'));
const id = await manager.requestCreatePlayground('a name', { id: 'model-1' } as ModelInfo);
expect(createPlaygroundSpy).toHaveBeenCalledWith('a name', { id: 'model-1' } as ModelInfo, expect.any(String));
expect(createTaskMock).toHaveBeenCalledWith('Creating Playground environment', 'loading', {
trackingId: id,
});
getTasksByLabelsMock.mockReturnValue([
{
labels: {
trackingId: id,
},
} as unknown as Task,
]);
await new Promise(resolve => setTimeout(resolve, 0));
expect(updateTaskMock).toHaveBeenCalledWith({
error: 'Something went wrong while trying to create a playground environment Error: an error.',
labels: {
trackingId: id,
},
state: 'error',
});
});
describe('system prompt', () => {
test('set system prompt on non existing conversation should throw an error', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
models: [
{
id: 'model1',
},
],
} as unknown as InferenceServer,
]);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
expect(() => {
manager.setSystemPrompt('invalid', 'content');
}).toThrowError('conversation with id invalid does not exist.');
});
test('set system prompt should throw an error if user already submit message', async () => {
vi.mocked(inferenceManagerMock.getServers).mockReturnValue([
{
status: 'running',
health: {
Status: 'healthy',
},
models: [
{
id: 'dummyModelId',
file: {
path: '.',
file: 'dummyModelFile',
},
},
],
connection: {
port: 8888,
},
labels: [],
} as unknown as InferenceServer,
]);
// @ts-expect-error - Mock return type for testing
vi.mocked(createOpenAICompatible).mockReturnValue(() =>
createTestModel({
stream: simulateReadableStream({
chunks: [
{ type: 'text-delta', id: 'id-1', delta: 'The message from the model' },
{
type: 'finish',
finishReason: { unified: 'stop', raw: undefined },
usage: {
outputTokens: { total: 133, text: undefined, reasoning: undefined },
inputTokens: { total: 7, noCache: undefined, cacheRead: undefined, cacheWrite: undefined },
},
},
],
}),
}),
);
const manager = new PlaygroundV2Manager(
rpcExtensionMock,
inferenceManagerMock,
taskRegistryMock,
telemetryMock,
cancellationTokenRegistryMock,
mcpServerManager,
);
await manager.createPlayground('playground 1', { id: 'dummyModelId' } as ModelInfo, 'tracking-1');
const date = new Date(2000, 1, 1, 13);
vi.setSystemTime(date);
const conversations = manager.getConversations();
await manager.submit(conversations[0].id, 'dummyUserInput');
// Wait for assistant message to be completed
await vi.waitFor(() => {
expect((manager.getConversations()[0].messages[1] as ChatMessage).content).toBeDefined();
});
expect(() => {
manager.setSystemPrompt(manager.getConversations()[0].id, 'newSystemPrompt');
}).toThrowError('Cannot change system prompt on started conversation.');
});
});
================================================
FILE: packages/backend/src/managers/playgroundV2Manager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { Disposable, TelemetryLogger } from '@podman-desktop/api';
import type { InferenceManager } from './inference/inferenceManager';
import type { ModelOptions } from '@shared/models/IModelOptions';
import { ConversationRegistry } from '../registries/ConversationRegistry';
import type { Conversation, SystemPrompt, UserChat } from '@shared/models/IPlaygroundMessage';
import { isSystemPrompt } from '@shared/models/IPlaygroundMessage';
import type { ModelInfo } from '@shared/models/IModelInfo';
import { withDefaultConfiguration } from '../utils/inferenceUtils';
import { getRandomString } from '../utils/randomUtils';
import type { TaskRegistry } from '../registries/TaskRegistry';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import { getHash } from '../utils/sha';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { AiStreamProcessor } from './playground/aiSdk';
import { type McpServerManager } from './playground/McpServerManager';
import type { ToolSet } from 'ai';
import { simulateStreamingMiddleware, wrapLanguageModel } from 'ai';
export class PlaygroundV2Manager implements Disposable {
readonly #conversationRegistry: ConversationRegistry;
constructor(
rpcExtension: RpcExtension,
private inferenceManager: InferenceManager,
private taskRegistry: TaskRegistry,
private telemetry: TelemetryLogger,
private cancellationTokenRegistry: CancellationTokenRegistry,
private mcpServerManager: McpServerManager,
) {
this.#conversationRegistry = new ConversationRegistry(rpcExtension);
}
deleteConversation(conversationId: string): void {
const conversation = this.#conversationRegistry.get(conversationId);
this.telemetry.logUsage('playground.delete', {
totalMessages: conversation.messages.length,
modelId: getHash(conversation.modelId),
});
this.#conversationRegistry.deleteConversation(conversationId);
}
async requestCreatePlayground(name: string, model: ModelInfo): Promise {
const trackingId: string = getRandomString();
const task = this.taskRegistry.createTask('Creating Playground environment', 'loading', {
trackingId: trackingId,
});
const telemetry: Record = {
hasName: !!name,
modelId: getHash(model.id),
};
this.createPlayground(name, model, trackingId)
.then((playgroundId: string) => {
this.taskRegistry.updateTask({
...task,
state: 'success',
labels: {
...task.labels,
playgroundId,
},
});
})
.catch((err: unknown) => {
telemetry['errorMessage'] = `${String(err)}`;
const tasks = this.taskRegistry.getTasksByLabels({
trackingId: trackingId,
});
// Filter the one no in loading state
tasks
.filter(t => t.state === 'loading' && t.id !== task.id)
.forEach(t => {
this.taskRegistry.updateTask({
...t,
state: 'error',
});
});
// Update the main task
this.taskRegistry.updateTask({
...task,
state: 'error',
error: `Something went wrong while trying to create a playground environment ${String(err)}.`,
});
})
.finally(() => {
this.telemetry.logUsage('playground.create', telemetry);
});
return trackingId;
}
async createPlayground(name: string, model: ModelInfo, trackingId: string): Promise {
if (!name) {
name = this.getFreeName();
}
if (!this.isNameFree(name)) {
throw new Error(`a playground with the name ${name} already exists`);
}
// Create conversation
const conversationId = this.#conversationRegistry.createConversation(name, model.id);
// create/start inference server if necessary
const servers = this.inferenceManager.getServers();
const server = servers.find(s => s.models.map(mi => mi.id).includes(model.id));
if (!server) {
await this.inferenceManager.createInferenceServer(
await withDefaultConfiguration({
modelsInfo: [model],
labels: {
trackingId: trackingId,
},
}),
);
} else if (server.status === 'stopped') {
await this.inferenceManager.startInferenceServer(server.container.containerId);
}
return conversationId;
}
/**
* Add a system prompt to an existing conversation.
* @param conversationId the conversation to append the system prompt to.
* @param content the content of the system prompt
*/
private submitSystemPrompt(conversationId: string, content: string): void {
this.#conversationRegistry.submit(conversationId, {
content: content,
role: 'system',
id: this.#conversationRegistry.getUniqueId(),
timestamp: Date.now(),
} as SystemPrompt);
this.telemetry.logUsage('playground.system-prompt.create', {
modelId: getHash(this.#conversationRegistry.get(conversationId).modelId),
});
}
/**
* Given a conversation, update the system prompt.
* If none exists, it will create one, otherwise it will replace the content with the new one
* @param conversationId the conversation id to set the system id
* @param content the new system prompt to use
*/
setSystemPrompt(conversationId: string, content: string | undefined): void {
const conversation = this.#conversationRegistry.get(conversationId);
if (content === undefined || content.length === 0) {
this.#conversationRegistry.removeMessage(conversationId, conversation.messages[0].id);
this.telemetry.logUsage('playground.system-prompt.delete', {
modelId: getHash(conversation.modelId),
});
return;
}
if (conversation.messages.length === 0) {
this.submitSystemPrompt(conversationId, content);
} else if (conversation.messages.length === 1 && isSystemPrompt(conversation.messages[0])) {
this.#conversationRegistry.update(conversationId, conversation.messages[0].id, {
content,
});
this.telemetry.logUsage('playground.system-prompt.update', {
modelId: getHash(conversation.modelId),
});
} else {
throw new Error('Cannot change system prompt on started conversation.');
}
}
/**
* @param conversationId
* @param userInput the user input
* @param options the model configuration
*/
async submit(conversationId: string, userInput: string, options?: ModelOptions): Promise {
const conversation = this.#conversationRegistry.get(conversationId);
const servers = this.inferenceManager.getServers();
const server = servers.find(s => s.models.map(mi => mi.id).includes(conversation.modelId));
if (server === undefined) throw new Error('Inference server not found.');
if (server.status !== 'running') throw new Error('Inference server is not running.');
if (server.health?.Status !== 'healthy')
throw new Error(`Inference server is not healthy, currently status: ${server.health?.Status ?? 'unknown'}.`);
const modelInfo = server.models.find(model => model.id === conversation.modelId);
if (modelInfo === undefined)
throw new Error(
`modelId '${conversation.modelId}' is not available on the inference server, valid model ids are: ${server.models.map(model => model.id).join(', ')}.`,
);
this.#conversationRegistry.submit(conversation.id, {
content: userInput,
options: options,
role: 'user',
id: this.#conversationRegistry.getUniqueId(),
timestamp: Date.now(),
} as UserChat);
if (!modelInfo.file?.path) throw new Error('model info has undefined file.');
const telemetry: Record = {
conversationId: conversationId,
...options,
promptLength: userInput.length,
modelId: getHash(modelInfo.id),
};
const streamProcessor = new AiStreamProcessor(conversationId, this.#conversationRegistry);
const cancelTokenId = this.cancellationTokenRegistry.createCancellationTokenSource(() => {
streamProcessor.abortController.abort('cancel');
});
const tools: ToolSet = {};
const mcpClients = await this.mcpServerManager.toMcpClients();
for (const client of mcpClients) {
const clientTools = await client.tools();
for (const entry of Object.entries(clientTools)) {
tools[entry[0]] = entry[1];
}
}
const openAiClient = createOpenAICompatible({
name: modelInfo.name,
baseURL: server.labels['api'] ?? `http://localhost:${server.connection.port}/v1`,
});
let model = openAiClient(modelInfo.name);
// Tool calling in OpenAI doesn't support streaming yet
if (Object.keys(tools).length > 0) {
model = wrapLanguageModel({ model, middleware: simulateStreamingMiddleware() });
}
const start = Date.now();
const finalBlock = (): void => {
this.telemetry.logUsage('playground.submit', telemetry);
this.cancellationTokenRegistry.delete(cancelTokenId);
Promise.all(mcpClients.map(client => client.close())).catch((e: unknown) =>
console.error(`Error closing MCP client`, e),
);
};
streamProcessor
.stream(model, tools, options)
.consumeStream()
.then(
() => {
this.telemetry.logUsage('playground.message.complete', {
duration: Date.now() - start,
modelId: getHash(conversation.modelId),
});
finalBlock();
},
(err: unknown) => {
console.error('Something went wrong while processing stream', err);
finalBlock();
},
);
return cancelTokenId;
}
getConversations(): Conversation[] {
return this.#conversationRegistry.getAll();
}
private getFreeName(): string {
const names = new Set(this.getConversations().map(c => c.name));
let i = 0;
let name: string;
do {
name = `playground ${++i}`;
} while (names.has(name));
return name;
}
private isNameFree(name: string): boolean {
return !this.getConversations().some(c => c.name === name);
}
dispose(): void {
this.#conversationRegistry.dispose();
}
}
================================================
FILE: packages/backend/src/managers/podmanConnection.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, describe, expect, test, vi } from 'vitest';
import { PodmanConnection } from './podmanConnection';
import type {
ContainerProviderConnection,
Extension,
ProviderConnectionStatus,
ProviderContainerConnection,
ProviderEvent,
RegisterContainerConnectionEvent,
RunResult,
UnregisterContainerConnectionEvent,
UpdateContainerConnectionEvent,
} from '@podman-desktop/api';
import { containerEngine, extensions, process, provider, EventEmitter, env } from '@podman-desktop/api';
import { VMType } from '@shared/models/IPodman';
import type { ModelInfo } from '@shared/models/IModelInfo';
import { getPodmanCli, getPodmanMachineName } from '../utils/podman';
import type { RpcExtension } from '@shared/messages/MessageProxy';
import { MSG_PODMAN_CONNECTION_UPDATE } from '@shared/Messages';
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
vi.mock('@podman-desktop/api', async () => {
return {
EventEmitter: vi.fn(),
provider: {
onDidUnregisterContainerConnection: vi.fn(),
onDidRegisterContainerConnection: vi.fn(),
onDidUpdateContainerConnection: vi.fn(),
onDidUpdateProvider: vi.fn(),
getContainerConnections: vi.fn(),
},
process: {
exec: vi.fn(),
},
extensions: {
getExtension: vi.fn(),
},
containerEngine: {
listInfos: vi.fn(),
},
env: {
isLinux: vi.fn(),
},
navigation: {},
};
});
vi.mock('../utils/podman', () => {
return {
getPodmanCli: vi.fn(),
getPodmanMachineName: vi.fn(),
MIN_CPUS_VALUE: 4,
};
});
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(rpcExtensionMock.fire).mockResolvedValue(true);
vi.mocked(provider.getContainerConnections).mockReturnValue([]);
vi.mocked(getPodmanCli).mockReturnValue('podman-executable');
vi.mocked(getPodmanMachineName).mockImplementation(connection => connection.name);
const listeners: ((value: unknown) => void)[] = [];
vi.mocked(EventEmitter).mockReturnValue({
event: vi.fn().mockImplementation(callback => {
listeners.push(callback);
}),
fire: vi.fn().mockImplementation((content: unknown) => {
listeners.forEach(listener => listener(content));
}),
} as unknown as EventEmitter);
});
const providerContainerConnectionMock: ProviderContainerConnection = {
connection: {
type: 'podman',
status: () => 'started',
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
},
providerId: 'podman',
};
describe('execute', () => {
test('execute should get the podman extension from api', async () => {
vi.mocked(extensions.getExtension).mockReturnValue(undefined);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.execute(providerContainerConnectionMock.connection, ['ls']);
expect(extensions.getExtension).toHaveBeenCalledWith('podman-desktop.podman');
});
test('execute should call getPodmanCli if extension not available', async () => {
vi.mocked(extensions.getExtension).mockReturnValue(undefined);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.execute(providerContainerConnectionMock.connection, ['ls']);
expect(getPodmanCli).toHaveBeenCalledOnce();
expect(process.exec).toHaveBeenCalledWith('podman-executable', ['ls'], undefined);
});
test('options should be propagated to process execution when provided', async () => {
vi.mocked(extensions.getExtension).mockReturnValue(undefined);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.execute(providerContainerConnectionMock.connection, ['ls'], {
isAdmin: true,
});
expect(getPodmanCli).toHaveBeenCalledOnce();
expect(process.exec).toHaveBeenCalledWith('podman-executable', ['ls'], {
isAdmin: true,
});
});
test('execute should use extension exec if available', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]);
const podmanAPI = {
exec: vi.fn(),
};
vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.execute(providerContainerConnectionMock.connection, ['ls']);
expect(getPodmanCli).not.toHaveBeenCalledOnce();
expect(podmanAPI.exec).toHaveBeenCalledWith(['ls'], {
connection: providerContainerConnectionMock,
});
});
test('an error should be throw if the provided container connection do not exists', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([]);
const podmanAPI = {
exec: vi.fn(),
};
vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension);
const manager = new PodmanConnection(rpcExtensionMock);
await expect(async () => {
await manager.execute(providerContainerConnectionMock.connection, ['ls'], {
isAdmin: true,
});
}).rejects.toThrowError('cannot find podman provider with connection name Podman Machine');
});
test('execute should propagate options to extension exec if available', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]);
const podmanAPI = {
exec: vi.fn(),
};
vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.execute(providerContainerConnectionMock.connection, ['ls'], {
isAdmin: true,
});
expect(getPodmanCli).not.toHaveBeenCalledOnce();
expect(podmanAPI.exec).toHaveBeenCalledWith(['ls'], {
isAdmin: true,
connection: providerContainerConnectionMock,
});
});
});
describe('executeSSH', () => {
test('executeSSH should call getPodmanCli if extension not available', async () => {
vi.mocked(extensions.getExtension).mockReturnValue(undefined);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.executeSSH(providerContainerConnectionMock.connection, ['ls']);
expect(getPodmanCli).toHaveBeenCalledOnce();
expect(process.exec).toHaveBeenCalledWith(
'podman-executable',
['machine', 'ssh', providerContainerConnectionMock.connection.name, 'ls'],
undefined,
);
});
test('executeSSH should use extension exec if available', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]);
const podmanAPI = {
exec: vi.fn(),
};
vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.executeSSH(providerContainerConnectionMock.connection, ['ls']);
expect(getPodmanCli).not.toHaveBeenCalledOnce();
expect(podmanAPI.exec).toHaveBeenCalledWith(
['machine', 'ssh', providerContainerConnectionMock.connection.name, 'ls'],
{
connection: providerContainerConnectionMock,
},
);
});
test('executeSSH should propagate options to extension exec if available', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnectionMock]);
const podmanAPI = {
exec: vi.fn(),
};
vi.mocked(extensions.getExtension).mockReturnValue({ exports: podmanAPI } as unknown as Extension);
const manager = new PodmanConnection(rpcExtensionMock);
await manager.executeSSH(providerContainerConnectionMock.connection, ['ls'], {
isAdmin: true,
});
expect(getPodmanCli).not.toHaveBeenCalledOnce();
expect(podmanAPI.exec).toHaveBeenCalledWith(
['machine', 'ssh', providerContainerConnectionMock.connection.name, 'ls'],
{
isAdmin: true,
connection: providerContainerConnectionMock,
},
);
});
});
describe('podman connection initialization', () => {
test('init should notify publisher', () => {
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []);
});
test('init should register all provider events', () => {
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
expect(provider.onDidUnregisterContainerConnection).toHaveBeenCalledWith(expect.any(Function));
expect(provider.onDidRegisterContainerConnection).toHaveBeenCalledWith(expect.any(Function));
expect(provider.onDidUpdateContainerConnection).toHaveBeenCalledWith(expect.any(Function));
expect(provider.onDidUpdateProvider).toHaveBeenCalledWith(expect.any(Function));
});
test('init should fetch all container connections', () => {
const statusMock = vi.fn().mockReturnValue('started');
const providerContainerConnection: ProviderContainerConnection = {
connection: {
type: 'podman',
status: statusMock,
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
},
providerId: 'podman',
};
vi.mocked(provider.getContainerConnections).mockReturnValue([providerContainerConnection]);
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
expect(manager.getContainerProviderConnectionInfo()).toStrictEqual([
{
name: 'Podman Machine',
providerId: 'podman',
status: 'started',
type: 'podman',
vmType: VMType.UNKNOWN,
},
]);
expect(manager.getContainerProviderConnections()).toStrictEqual([providerContainerConnection.connection]);
expect(statusMock).toHaveBeenCalled();
});
});
async function getListeners(): Promise<{
onDidUnregisterContainerConnection: (e: UnregisterContainerConnectionEvent) => void;
onDidRegisterContainerConnection: (e: RegisterContainerConnectionEvent) => void;
onDidUpdateContainerConnection: (e: UpdateContainerConnectionEvent) => void;
onDidUpdateProvider: (e: ProviderEvent) => void;
podmanConnection: PodmanConnection;
}> {
const onDidUnregisterContainerConnectionPromise: Promise<(e: UnregisterContainerConnectionEvent) => void> =
new Promise(resolve => {
vi.mocked(provider.onDidUnregisterContainerConnection).mockImplementation(
(fn: (e: UnregisterContainerConnectionEvent) => void) => {
resolve(fn);
return {
dispose: vi.fn(),
};
},
);
});
const onDidRegisterContainerConnectionPromise: Promise<(e: RegisterContainerConnectionEvent) => void> = new Promise(
resolve => {
vi.mocked(provider.onDidRegisterContainerConnection).mockImplementation(
(fn: (e: RegisterContainerConnectionEvent) => void) => {
resolve(fn);
return {
dispose: vi.fn(),
};
},
);
},
);
const onDidUpdateContainerConnectionPromise: Promise<(e: UpdateContainerConnectionEvent) => void> = new Promise(
resolve => {
vi.mocked(provider.onDidUpdateContainerConnection).mockImplementation(
(fn: (e: UpdateContainerConnectionEvent) => void) => {
resolve(fn);
return {
dispose: vi.fn(),
};
},
);
},
);
const onDidUpdateProviderPromise: Promise<(e: ProviderEvent) => void> = new Promise(resolve => {
vi.mocked(provider.onDidUpdateProvider).mockImplementation((fn: (e: ProviderEvent) => void) => {
resolve(fn);
return {
dispose: vi.fn(),
};
});
});
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
return {
onDidUnregisterContainerConnection: await onDidUnregisterContainerConnectionPromise,
onDidRegisterContainerConnection: await onDidRegisterContainerConnectionPromise,
onDidUpdateContainerConnection: await onDidUpdateContainerConnectionPromise,
onDidUpdateProvider: await onDidUpdateProviderPromise,
podmanConnection: manager,
};
}
describe('container connection event', () => {
test('onDidUnregisterContainerConnection should refresh and notify webview', async () => {
const { onDidUnregisterContainerConnection } = await getListeners();
// simulate onDidUnregisterContainerConnection event
onDidUnregisterContainerConnection({ providerId: 'podman' });
// ensure the webview has been notified
await vi.waitFor(() => {
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []);
});
});
test('onDidUnregisterContainerConnection should fire PodmanConnectionEvent', async () => {
const { onDidUnregisterContainerConnection, podmanConnection } = await getListeners();
// register event listener
const onPodmanConnectionEventListenerMock = vi.fn();
podmanConnection.onPodmanConnectionEvent(onPodmanConnectionEventListenerMock);
// simulate onDidUnregisterContainerConnection event
onDidUnregisterContainerConnection({ providerId: 'podman' });
expect(onPodmanConnectionEventListenerMock).toHaveBeenCalledWith({
status: 'unregister',
});
});
test('onDidRegisterContainerConnection should notify webview', async () => {
const { onDidRegisterContainerConnection, podmanConnection } = await getListeners();
// simulate a onDidRegisterContainerConnection event
onDidRegisterContainerConnection({
providerId: 'podman',
connection: {
type: 'podman',
name: 'Podman Machine',
status: () => 'started',
endpoint: {
socketPath: './socket-path',
},
},
});
// ensure the webview has been notified
await vi.waitFor(() => {
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, [
{
providerId: 'podman',
name: 'Podman Machine',
status: 'started',
type: 'podman',
vmType: VMType.UNKNOWN,
},
]);
});
// ensure it has properly been added
expect(podmanConnection.getContainerProviderConnectionInfo().length).toBe(1);
});
test('onDidRegisterContainerConnection should fire PodmanConnectionEvent', async () => {
const { onDidRegisterContainerConnection, podmanConnection } = await getListeners();
// register event listener
const onPodmanConnectionEventListenerMock = vi.fn();
podmanConnection.onPodmanConnectionEvent(onPodmanConnectionEventListenerMock);
// simulate a onDidRegisterContainerConnection event
onDidRegisterContainerConnection({
providerId: 'podman',
connection: {
type: 'podman',
name: 'Podman Machine',
status: () => 'started',
endpoint: {
socketPath: './socket-path',
},
},
});
expect(onPodmanConnectionEventListenerMock).toHaveBeenCalledWith({
status: 'register',
});
});
test('onDidUpdateProvider should refresh and notify webview', async () => {
const { onDidUpdateProvider } = await getListeners();
// simulate onDidUnregisterContainerConnection event
onDidUpdateProvider({ name: 'podman', status: 'unknown', id: 'podman' });
// ensure the webview has been notified
await vi.waitFor(() => {
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []);
});
});
test('onDidUpdateContainerConnection should refresh and notify webview', async () => {
const { onDidUpdateContainerConnection } = await getListeners();
// simulate onDidUnregisterContainerConnection event
onDidUpdateContainerConnection({
status: 'started',
providerId: 'podman',
connection: {
type: 'podman',
name: 'Podman Machine',
status: () => 'started',
endpoint: {
socketPath: './socket-path',
},
},
});
// ensure the webview has been notified
await vi.waitFor(() => {
expect(rpcExtensionMock.fire).toHaveBeenCalledWith(MSG_PODMAN_CONNECTION_UPDATE, []);
});
});
});
describe('getVMType', () => {
test('empty response should throw an error', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: '[]',
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
await expect(() => manager.getVMType('machine')).rejects.toThrowError(
'podman machine list provided an empty array',
);
});
test('empty array should return UNKNOWN when no name is provided', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: '[]',
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
expect(await manager.getVMType()).toBe(VMType.UNKNOWN);
});
test('malformed response should throw an error', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: '{}',
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
await expect(() => manager.getVMType()).rejects.toThrowError('podman machine list provided a malformed response');
});
test('array with length greater than one require name', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: '[{}, {}]',
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
await expect(() => manager.getVMType()).rejects.toThrowError(
'name need to be provided when more than one podman machine is configured.',
);
});
test('argument name should be used to filter the machine', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: JSON.stringify([
{
Name: 'machine-1',
VMType: VMType.QEMU,
},
{
Name: 'machine-2',
VMType: VMType.APPLEHV,
},
]),
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
expect(await manager.getVMType('machine-2')).toBe(VMType.APPLEHV);
});
test('invalid name should throw an error', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: JSON.stringify([
{
Name: 'machine-1',
},
{
Name: 'machine-2',
},
]),
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
await expect(() => manager.getVMType('potatoes')).rejects.toThrowError(
'cannot find matching podman machine with name potatoes',
);
});
test('single machine should return its VMType', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: JSON.stringify([
{
Name: 'machine-1',
VMType: VMType.WSL,
},
]),
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
expect(await manager.getVMType()).toBe(VMType.WSL);
});
test('unknown string should return UNKNOWN', async () => {
vi.mocked(process.exec).mockResolvedValue({
stdout: JSON.stringify([
{
Name: 'machine-1',
VMType: 'fake-content',
},
]),
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
expect(await manager.getVMType()).toBe(VMType.UNKNOWN);
});
test.each(Object.values(VMType) as string[])('%s type should be the expected result', async vmtype => {
vi.mocked(process.exec).mockResolvedValue({
stdout: JSON.stringify([
{
VMType: vmtype,
},
]),
} as unknown as RunResult);
const manager = new PodmanConnection(rpcExtensionMock);
expect(await manager.getVMType()).toBe(vmtype);
});
});
const modelMock: ModelInfo & { memory: number } = {
name: 'dummy',
memory: 10,
description: '',
id: 'dummy-id',
properties: {},
};
describe('checkContainerConnectionStatusAndResources', () => {
test('return native on Linux', async () => {
const manager = new PodmanConnection(rpcExtensionMock);
vi.mocked(env).isLinux = true;
const result = await manager.checkContainerConnectionStatusAndResources({
model: modelMock,
context: 'inference',
});
expect(result).toStrictEqual({
status: 'native',
canRedirect: expect.any(Boolean),
});
});
test('return noMachineInfo if there is no running podman connection', async () => {
const manager = new PodmanConnection(rpcExtensionMock);
vi.mocked(env).isLinux = false;
const result = await manager.checkContainerConnectionStatusAndResources({
model: modelMock,
context: 'inference',
});
expect(result).toStrictEqual({
status: 'no-machine',
canRedirect: expect.any(Boolean),
});
});
test('return noMachineInfo if we are not able to retrieve any info about the podman connection', async () => {
const manager = new PodmanConnection(rpcExtensionMock);
vi.mocked(env).isLinux = false;
vi.mocked(containerEngine.listInfos).mockResolvedValue([]);
const result = await manager.checkContainerConnectionStatusAndResources({
model: modelMock,
context: 'inference',
});
expect(result).toStrictEqual({
status: 'no-machine',
canRedirect: expect.any(Boolean),
});
});
test('return lowResourceMachineInfo if the podman connection has not enough cpus', async () => {
const manager = new PodmanConnection(rpcExtensionMock);
vi.mocked(env).isLinux = false;
vi.mocked(provider.getContainerConnections).mockReturnValue([
{
connection: {
type: 'podman',
status: (): ProviderConnectionStatus => 'started',
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
},
providerId: 'podman',
},
]);
vi.mocked(containerEngine.listInfos).mockResolvedValue([
{
engineId: 'engineId',
engineName: 'enginerName',
engineType: 'podman',
cpus: 3,
memory: 20,
memoryUsed: 0,
},
]);
manager.init();
const result = await manager.checkContainerConnectionStatusAndResources({
model: modelMock,
context: 'inference',
});
expect(result).toStrictEqual({
status: 'low-resources',
canRedirect: expect.any(Boolean),
name: 'Podman Machine',
canEdit: false,
cpus: 3,
memoryIdle: 20,
cpusExpected: 4,
memoryExpected: 11,
});
});
test('return runningMachineInfo if the podman connection has enough resources', async () => {
const manager = new PodmanConnection(rpcExtensionMock);
vi.mocked(env).isLinux = false;
vi.mocked(provider.getContainerConnections).mockReturnValue([
{
connection: {
type: 'podman',
status: (): ProviderConnectionStatus => 'started',
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
},
providerId: 'podman',
},
]);
vi.mocked(containerEngine.listInfos).mockResolvedValue([
{
engineId: 'engineId',
engineName: 'enginerName',
engineType: 'podman',
cpus: 12,
memory: 20,
memoryUsed: 0,
},
]);
manager.init();
const result = await manager.checkContainerConnectionStatusAndResources({
model: modelMock,
context: 'inference',
});
expect(result).toStrictEqual({
name: 'Podman Machine',
status: 'running',
canRedirect: expect.any(Boolean),
});
});
});
describe('getConnectionByEngineId', () => {
test('no provider should raise an error', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([]);
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
await expect(() => manager.getConnectionByEngineId('fake engine')).rejects.toThrowError('connection not found');
expect(containerEngine.listInfos).not.toHaveBeenCalled();
});
test('empty listInfos response should raise an error', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([
{
connection: {
type: 'podman',
status: (): ProviderConnectionStatus => 'started',
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
},
providerId: 'podman',
},
]);
vi.mocked(containerEngine.listInfos).mockResolvedValue([]);
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
await expect(() => manager.getConnectionByEngineId('fake engine')).rejects.toThrowError('connection not found');
expect(containerEngine.listInfos).toHaveBeenCalled();
});
test('invalid engineId should raise an error', async () => {
vi.mocked(provider.getContainerConnections).mockReturnValue([
{
connection: {
type: 'podman',
status: (): ProviderConnectionStatus => 'started',
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
},
providerId: 'podman',
},
]);
vi.mocked(containerEngine.listInfos).mockResolvedValue([
{
engineId: 'engineId',
engineName: 'enginerName',
engineType: 'podman',
cpus: 12,
memory: 20,
memoryUsed: 0,
},
]);
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
await expect(() => manager.getConnectionByEngineId('fake engine')).rejects.toThrowError('connection not found');
expect(containerEngine.listInfos).toHaveBeenCalled();
});
test('valid engineId should return matching connection', async () => {
const connectionMock: ContainerProviderConnection = {
type: 'podman',
status: () => 'started',
name: 'Podman Machine',
endpoint: {
socketPath: './socket-path',
},
};
vi.mocked(provider.getContainerConnections).mockReturnValue([
{
connection: connectionMock,
providerId: 'podman',
},
]);
vi.mocked(containerEngine.listInfos).mockResolvedValue([
{
engineId: 'engineId',
engineName: 'enginerName',
engineType: 'podman',
cpus: 12,
memory: 20,
memoryUsed: 0,
},
]);
const manager = new PodmanConnection(rpcExtensionMock);
manager.init();
const connection = await manager.getConnectionByEngineId('engineId');
expect(containerEngine.listInfos).toHaveBeenCalled();
expect(connection).toBe(connectionMock);
});
});
================================================
FILE: packages/backend/src/managers/podmanConnection.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type {
ContainerProviderConnection,
Disposable,
Event,
RegisterContainerConnectionEvent,
UpdateContainerConnectionEvent,
RunResult,
RunOptions,
ProviderContainerConnection,
} from '@podman-desktop/api';
import { containerEngine, env, navigation, EventEmitter, process, provider, extensions } from '@podman-desktop/api';
import { getPodmanMachineName, type MachineJSON, MIN_CPUS_VALUE, getPodmanCli } from '../utils/podman';
import { VMType } from '@shared/models/IPodman';
import { Publisher } from '../utils/Publisher';
import type {
CheckContainerConnectionResourcesOptions,
ContainerConnectionInfo,
ContainerProviderConnectionInfo,
} from '@shared/models/IContainerConnectionInfo';
import { MSG_PODMAN_CONNECTION_UPDATE } from '@shared/Messages';
import type { RpcExtension } from '@shared/messages/MessageProxy';
export interface PodmanConnectionEvent {
status: 'stopped' | 'started' | 'unregister' | 'register';
}
export interface PodmanRunOptions extends RunOptions {
connection?: ProviderContainerConnection;
}
export class PodmanConnection extends Publisher implements Disposable {
// Map of providerId with corresponding connections
#providers: Map;
#disposables: Disposable[];
private readonly _onPodmanConnectionEvent = new EventEmitter();
readonly onPodmanConnectionEvent: Event = this._onPodmanConnectionEvent.event;
constructor(rpcExtension: RpcExtension) {
super(rpcExtension, MSG_PODMAN_CONNECTION_UPDATE, () => this.getContainerProviderConnectionInfo());
this.#providers = new Map();
this.#disposables = [];
}
/**
* Execute the podman cli with the arguments provided
*
* @example
* ```
* const result = await podman.execute(connection, ['machine', 'ls', '--format=json']);
* ```
* @param connection
* @param args
* @param options
*/
execute(connection: ContainerProviderConnection, args: string[], options?: RunOptions): Promise {
const podman = extensions.getExtension('podman-desktop.podman');
if (!podman) {
console.warn('cannot find podman extension api');
return this.executeLegacy(args, options);
}
const podmanApi: {
exec(args: string[], options?: PodmanRunOptions): Promise;
} = podman.exports;
return podmanApi.exec(args, {
...options,
connection: this.getProviderContainerConnection(connection),
});
}
/**
* Execute a command inside the podman machine
*
* @example
* ```
* const result = await podman.executeSSH(connection, ['ls', '/dev']);
* ```
* @param connection
* @param args
* @param options
*/
executeSSH(connection: ContainerProviderConnection, args: string[], options?: RunOptions): Promise {
return this.execute(connection, ['machine', 'ssh', this.getNameLegacyCompatibility(connection), ...args], options);
}
/**
* Before 1.13, the podman extension was not exposing any api.
*
* Therefore, to support old version we need to get the podman executable ourself
* @deprecated
*/
protected executeLegacy(args: string[], options?: RunOptions): Promise {
return process.exec(getPodmanCli(), [...args], options);
}
/**
* Before 1.13, the {@link ContainerProviderConnection.name} field was used as friendly user
* field also.
*
* Therefore, we could have `Podman Machine Default` as name, where the real machine was `podman-machine-default`.
* @param connection
* @deprecated
*/
protected getNameLegacyCompatibility(connection: ContainerProviderConnection): string {
return getPodmanMachineName(connection);
}
getContainerProviderConnections(): ContainerProviderConnection[] {
return Array.from(this.#providers.values()).flat();
}
/**
* This method flatten the
*/
getContainerProviderConnectionInfo(): ContainerProviderConnectionInfo[] {
const output: ContainerProviderConnectionInfo[] = [];
for (const [providerId, connections] of Array.from(this.#providers.entries())) {
output.push(
...connections.map(
(connection): ContainerProviderConnectionInfo => ({
providerId: providerId,
name: connection.name,
vmType: this.parseVMType(connection.vmType),
type: 'podman',
status: connection.status(),
}),
),
);
}
return output;
}
init(): void {
// setup listeners
this.listen();
this.refreshProviders();
}
dispose(): void {
this.#disposables.forEach(disposable => disposable.dispose());
}
/**
* This method allow us to get the ProviderContainerConnection given a ContainerProviderConnection
* @param connection
* @protected
*/
protected getProviderContainerConnection(connection: ContainerProviderConnection): ProviderContainerConnection {
const providers: ProviderContainerConnection[] = provider.getContainerConnections();
const podmanProvider = providers
.filter(({ connection }) => connection.type === 'podman')
.find(provider => provider.connection.name === connection.name);
if (!podmanProvider) throw new Error(`cannot find podman provider with connection name ${connection.name}`);
return podmanProvider;
}
protected refreshProviders(): void {
// clear all providers
this.#providers.clear();
const providers: ProviderContainerConnection[] = provider.getContainerConnections();
// register the podman container connection
providers
.filter(({ connection }) => connection.type === 'podman')
.forEach(({ providerId, connection }) => {
this.#providers.set(providerId, [connection, ...(this.#providers.get(providerId) ?? [])]);
});
// notify
this.notify();
}
private listen(): void {
// capture unregister event
this.#disposables.push(
provider.onDidUnregisterContainerConnection(() => {
this.refreshProviders();
this._onPodmanConnectionEvent.fire({
status: 'unregister',
});
}),
);
this.#disposables.push(
provider.onDidRegisterContainerConnection(({ providerId, connection }: RegisterContainerConnectionEvent) => {
if (connection.type !== 'podman') {
return;
}
// update connection
this.#providers.set(providerId, [connection, ...(this.#providers.get(providerId) ?? [])]);
this.notify();
this._onPodmanConnectionEvent.fire({
status: 'register',
});
}),
);
this.#disposables.push(
provider.onDidUpdateContainerConnection(({ status }: UpdateContainerConnectionEvent) => {
switch (status) {
case 'started':
case 'stopped':
this._onPodmanConnectionEvent.fire({
status: status,
});
this.notify();
break;
default:
break;
}
}),
);
this.#disposables.push(
provider.onDidUpdateProvider(() => {
this.refreshProviders();
}),
);
}
protected parseVMType(vmtype: string | undefined): VMType {
if (!vmtype) return VMType.UNKNOWN;
const type = Object.values(VMType).find(s => s === vmtype);
if (type === undefined) {
return VMType.UNKNOWN;
}
return type;
}
/**
* Get the VMType of the podman machine
* @param name the machine name, from {@link ContainerProviderConnection}
* @deprecated should uses the `getContainerProviderConnectionInfo()`
*/
async getVMType(name?: string): Promise {
const { stdout } = await process.exec(getPodmanCli(), ['machine', 'list', '--format', 'json']);
const parsed: unknown = JSON.parse(stdout);
if (!Array.isArray(parsed)) throw new Error('podman machine list provided a malformed response');
if (parsed.length === 0 && name) throw new Error('podman machine list provided an empty array');
// On Linux we might not have any machine
if (parsed.length === 0) return VMType.UNKNOWN;
if (parsed.length > 1 && !name)
throw new Error('name need to be provided when more than one podman machine is configured.');
let output: MachineJSON;
if (name) {
output = parsed.find(machine => typeof machine === 'object' && 'Name' in machine && machine.Name === name);
if (!output) throw new Error(`cannot find matching podman machine with name ${name}`);
} else {
output = parsed[0];
}
return this.parseVMType(output.VMType);
}
getContainerProviderConnection(connection: ContainerProviderConnectionInfo): ContainerProviderConnection {
const output = (this.#providers.get(connection.providerId) ?? []).find(
mConnection => connection.name === mConnection.name,
);
if (!output) throw new Error(`no container provider connection found for connection name ${connection.name}`);
return output;
}
findRunningContainerProviderConnection(): ContainerProviderConnection | undefined {
for (const connections of Array.from(this.#providers.values())) {
const result = connections.find(connection => connection.status() === 'started');
if (result) return result;
}
return undefined;
}
/**
* This method return the ContainerProviderConnection corresponding to an engineId
* @param engineId
*/
async getConnectionByEngineId(engineId: string): Promise {
const connections = Array.from(this.#providers.values()).flat();
for (const connection of connections) {
const infos = await containerEngine.listInfos({ provider: connection });
if (infos.length === 0) continue;
if (infos[0].engineId === engineId) return connection;
}
throw new Error('connection not found');
}
async checkContainerConnectionStatusAndResources(
options: CheckContainerConnectionResourcesOptions,
): Promise {
// starting from podman desktop 1.10 we have the navigate functions
const hasNavigateFunction = !!navigation.navigateToResources;
// if we do not precise the connection and are on linux we assume native usage
if (env.isLinux && !options.connection) {
return {
status: 'native',
canRedirect: hasNavigateFunction,
};
}
let connection: ContainerProviderConnection | undefined = undefined;
if (options.connection) {
connection = this.getContainerProviderConnection(options.connection);
} else {
connection = this.findRunningContainerProviderConnection();
}
if (!connection) {
return {
status: 'no-machine',
canRedirect: hasNavigateFunction,
};
}
const engineInfos = await containerEngine.listInfos({
provider: connection,
});
if (engineInfos.length === 0) {
return {
status: 'no-machine',
canRedirect: hasNavigateFunction,
};
}
const engineInfo = engineInfos[0];
if (!engineInfo) {
return {
status: 'no-machine',
canRedirect: hasNavigateFunction,
};
}
const hasCpus = engineInfo.cpus !== undefined && engineInfo.cpus >= MIN_CPUS_VALUE;
const multiplier = options.context === 'recipe' ? 1.25 : 1.1;
const memoryExpected = options.model.memory * multiplier;
let hasMemory: boolean = true;
if (engineInfo.memory !== undefined && engineInfo.memoryUsed !== undefined) {
hasMemory = engineInfo.memory - engineInfo.memoryUsed >= memoryExpected;
}
let memoryIdle: number = 0;
if (engineInfo.memory !== undefined && engineInfo.memoryUsed !== undefined) {
memoryIdle = engineInfo.memory - engineInfo.memoryUsed;
}
if (!hasCpus || !hasMemory) {
return {
name: connection.name,
cpus: engineInfo.cpus ?? 0,
memoryIdle: memoryIdle,
cpusExpected: MIN_CPUS_VALUE,
memoryExpected: memoryExpected,
status: 'low-resources',
canEdit: !!connection.lifecycle?.edit,
canRedirect: hasNavigateFunction,
};
}
return {
name: connection.name,
status: 'running',
canRedirect: hasNavigateFunction,
};
}
}
================================================
FILE: packages/backend/src/managers/recipes/BuilderManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, describe, expect, test, vi } from 'vitest';
import type { Recipe } from '@shared/models/IRecipe';
import type { ContainerConfig } from '../../models/AIConfig';
import fs from 'node:fs';
import { BuilderManager } from './BuilderManager';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { ContainerProviderConnection, ImageInfo } from '@podman-desktop/api';
import { containerEngine } from '@podman-desktop/api';
import { VMType } from '@shared/models/IPodman';
const taskRegistry = {
getTask: vi.fn(),
createTask: vi.fn(),
updateTask: vi.fn(),
delete: vi.fn(),
deleteAll: vi.fn(),
getTasks: vi.fn(),
getTasksByLabels: vi.fn(),
deleteByLabels: vi.fn(),
} as unknown as TaskRegistry;
vi.mock('@podman-desktop/api', () => ({
containerEngine: {
buildImage: vi.fn(),
listImages: vi.fn(),
},
}));
const connectionMock: ContainerProviderConnection = {
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
} as unknown as ContainerProviderConnection;
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(taskRegistry.createTask).mockImplementation((name, state, labels) => ({
id: 'random',
name: name,
state: state,
labels: labels ?? {},
error: undefined,
}));
});
describe('buildImages', () => {
const recipe = {
id: 'recipe1',
} as Recipe;
const containers: ContainerConfig[] = [
{
name: 'container1',
contextdir: 'contextdir1',
containerfile: 'Containerfile',
arch: ['amd64'],
modelService: false,
gpu_env: [],
ports: [8080],
},
];
const manager = new BuilderManager(taskRegistry);
test('setTaskState should be called with error if context does not exist', async () => {
vi.spyOn(fs, 'existsSync').mockReturnValue(false);
vi.mocked(containerEngine.listImages).mockRejectedValue([]);
await expect(manager.build(connectionMock, recipe, containers, 'config')).rejects.toThrow(
'Context configured does not exist.',
);
});
test('setTaskState should be called with error if buildImage execution fails', async () => {
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
vi.mocked(containerEngine.buildImage).mockRejectedValue('error');
vi.mocked(containerEngine.listImages).mockRejectedValue([]);
await expect(manager.build(connectionMock, recipe, containers, 'config')).rejects.toThrow(
'Something went wrong while building the image: error',
);
expect(taskRegistry.updateTask).toBeCalledWith({
error: 'Something went wrong while building the image: error',
name: 'Building container1',
id: expect.any(String),
state: expect.any(String),
labels: {},
});
});
test('setTaskState should be called with error if unable to find the image after built', async () => {
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
vi.mocked(containerEngine.buildImage).mockResolvedValue({});
vi.mocked(containerEngine.listImages).mockResolvedValue([]);
await expect(manager.build(connectionMock, recipe, containers, 'config')).rejects.toThrow(
'no image found for container1:latest',
);
expect(taskRegistry.updateTask).toBeCalledWith({
error: 'no image found for container1:latest',
name: 'Building container1',
id: expect.any(String),
state: expect.any(String),
labels: {},
});
});
test('succeed if building image do not fail', async () => {
vi.spyOn(fs, 'existsSync').mockReturnValue(true);
vi.mocked(containerEngine.buildImage).mockResolvedValue({});
vi.mocked(containerEngine.listImages).mockResolvedValue([
{
RepoTags: ['recipe1-container1:latest'],
engineId: 'engine',
Id: 'id1',
} as unknown as ImageInfo,
]);
const imageInfoList = await manager.build(connectionMock, recipe, containers, 'config');
expect(taskRegistry.updateTask).toBeCalledWith({
name: 'Building container1',
id: expect.any(String),
state: 'success',
labels: {},
});
expect(imageInfoList.length).toBe(1);
expect(imageInfoList[0].ports.length).toBe(1);
expect(imageInfoList[0].ports[0]).equals('8080');
expect(containerEngine.buildImage).toHaveBeenCalledWith(
'contextdir1',
expect.any(Function),
expect.objectContaining({
provider: connectionMock,
}),
);
});
});
================================================
FILE: packages/backend/src/managers/recipes/BuilderManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import {
type BuildImageOptions,
type Disposable,
containerEngine,
type ContainerProviderConnection,
} from '@podman-desktop/api';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { RecipeImage, Recipe } from '@shared/models/IRecipe';
import type { ContainerConfig } from '../../models/AIConfig';
import type { Task } from '@shared/models/ITask';
import path from 'node:path';
import { getParentDirectory } from '../../utils/pathUtils';
import fs from 'node:fs';
import { getImageTag } from '../../utils/imagesUtils';
import {
IMAGE_LABEL_APP_PORTS,
IMAGE_LABEL_APPLICATION_NAME,
IMAGE_LABEL_MODEL_SERVICE,
IMAGE_LABEL_RECIPE_ID,
} from '../../utils/RecipeConstants';
export class BuilderManager implements Disposable {
private controller: Map = new Map();
constructor(private taskRegistry: TaskRegistry) {}
/**
* On dispose, the builder will abort all current build.
*/
dispose(): void {
// eslint-disable-next-line sonarjs/array-callback-without-return
Array.from(this.controller.values()).every(controller => controller.abort('disposing builder manager'));
}
async build(
connection: ContainerProviderConnection,
recipe: Recipe,
containers: ContainerConfig[],
configPath: string,
labels: { [key: string]: string } = {},
): Promise {
const containerTasks: { [key: string]: Task } = Object.fromEntries(
containers.map(container => [
container.name,
this.taskRegistry.createTask(`Building ${container.name}`, 'loading', labels),
]),
);
const imageInfoList: RecipeImage[] = [];
// Promise all the build images
const abortController = new AbortController();
// only one build per recipe is supported
if (this.controller.has(recipe.id)) {
this.controller.get(recipe.id)?.abort('multiple build not supported.');
}
this.controller.set(recipe.id, abortController);
try {
await Promise.all(
containers.map(container => {
const task = containerTasks[container.name];
// We use the parent directory of our configFile as the rootdir, then we append the contextDir provided
const context = path.join(getParentDirectory(configPath), container.contextdir);
console.log(`Application Manager using context ${context} for container ${container.name}`);
// Ensure the context provided exist otherwise throw an Error
if (!fs.existsSync(context)) {
task.error = 'The context provided does not exist.';
this.taskRegistry.updateTask(task);
throw new Error('Context configured does not exist.');
}
const imageTag = getImageTag(recipe, container);
const buildOptions: BuildImageOptions = {
provider: connection,
containerFile: container.containerfile,
tag: imageTag,
labels: {
...labels,
[IMAGE_LABEL_RECIPE_ID]: recipe.id,
[IMAGE_LABEL_MODEL_SERVICE]: container.modelService ? 'true' : 'false',
[IMAGE_LABEL_APPLICATION_NAME]: container.name,
[IMAGE_LABEL_APP_PORTS]: (container.ports ?? []).join(','),
},
abortController: abortController,
};
let error = false;
return containerEngine
.buildImage(
context,
(event, data) => {
// todo: do something with the event
if (event === 'error' || (event === 'finish' && data !== '')) {
console.error('Something went wrong while building the image: ', data);
task.error = `Something went wrong while building the image: ${data}`;
this.taskRegistry.updateTask(task);
error = true;
}
},
buildOptions,
)
.catch((err: unknown) => {
task.error = `Something went wrong while building the image: ${String(err)}`;
this.taskRegistry.updateTask(task);
throw new Error(`Something went wrong while building the image: ${String(err)}`);
})
.then(() => {
if (error) {
throw new Error(`Something went wrong while building the image: ${imageTag}`);
}
});
}),
);
} catch (err: unknown) {
abortController.abort();
throw err;
} finally {
// remove abort controller
this.controller.delete(recipe.id);
}
// after image are built we return their data
const images = await containerEngine.listImages({ provider: connection });
await Promise.all(
containers.map(async container => {
const task = containerTasks[container.name];
const imageTag = getImageTag(recipe, container);
const image = images.find(im => {
return im.RepoTags?.some(tag => tag.endsWith(imageTag));
});
if (!image) {
task.error = `no image found for ${container.name}:latest`;
this.taskRegistry.updateTask(task);
throw new Error(`no image found for ${container.name}:latest`);
}
let imageName: string | undefined = undefined;
if (image.RepoTags && image.RepoTags.length > 0) {
imageName = image.RepoTags[0];
}
imageInfoList.push({
id: image.Id,
engineId: image.engineId,
name: imageName,
modelService: container.modelService,
ports: container.ports?.map(p => `${p}`) ?? [],
appName: container.name,
recipeId: recipe.id,
});
task.state = 'success';
this.taskRegistry.updateTask(task);
}),
);
return imageInfoList;
}
}
================================================
FILE: packages/backend/src/managers/recipes/PodManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, describe, vi, expect, test } from 'vitest';
import { PodManager } from './PodManager';
import type { ContainerInspectInfo, ContainerJSONEvent, PodCreateOptions, PodInfo } from '@podman-desktop/api';
import { EventEmitter, containerEngine } from '@podman-desktop/api';
vi.mock('@podman-desktop/api', () => ({
containerEngine: {
listPods: vi.fn(),
stopPod: vi.fn(),
removePod: vi.fn(),
startPod: vi.fn(),
createPod: vi.fn(),
inspectContainer: vi.fn(),
onEvent: vi.fn(),
},
EventEmitter: vi.fn(),
}));
beforeEach(() => {
vi.resetAllMocks();
// we return the id as health status
vi.mocked(containerEngine.inspectContainer).mockImplementation(async (engineId: string, id: string) => {
return {
State: {
Health: {
Status: id,
},
},
} as unknown as ContainerInspectInfo;
});
// mocking the EventEmitter mechanism
const listeners: ((value: unknown) => void)[] = [];
vi.mocked(EventEmitter).mockReturnValue({
event: vi.fn().mockImplementation(callback => {
listeners.push(callback);
}),
fire: vi.fn().mockImplementation((content: unknown) => {
listeners.forEach(listener => listener(content));
}),
} as unknown as EventEmitter);
});
test('getAllPods should use container engine list pods method', async () => {
await new PodManager().getAllPods();
expect(containerEngine.listPods).toHaveBeenCalledOnce();
});
test('findPodByLabelsValues should only return pods with labels matching values', async () => {
vi.mocked(containerEngine.listPods).mockResolvedValue([
{
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-invalid',
hello: 'eggs',
},
},
{
Id: 'pod-id-2',
Labels: {
hello: 'world',
'dummy-key': 'dummy-valid',
},
},
{
Id: 'pod-id-2',
Labels: {
hello: 'world',
'dummy-key': 'invalid',
},
},
{
Id: 'pod-id-3',
},
] as unknown as PodInfo[]);
const pod = await new PodManager().findPodByLabelsValues({
'dummy-key': 'dummy-valid',
hello: 'world',
});
expect(pod).toBeDefined();
expect(pod?.Id).toBe('pod-id-2');
});
test('getPodsWithLabels should only return pods with proper labels', async () => {
vi.mocked(containerEngine.listPods).mockResolvedValue([
{
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-value',
hello: 'world',
},
},
{
Id: 'pod-id-2',
Labels: {
hello: 'world',
'dummy-key': 'dummy-value',
},
},
{
Id: 'pod-id-3',
},
] as unknown as PodInfo[]);
const pods = await new PodManager().getPodsWithLabels(['dummy-key']);
expect(pods.length).toBe(2);
expect(pods.find(pod => pod.Id === 'pod-id-1')).toBeDefined();
expect(pods.find(pod => pod.Id === 'pod-id-2')).toBeDefined();
expect(pods.find(pod => pod.Id === 'pod-id-3')).toBeUndefined();
});
describe('getHealth', () => {
test('getHealth with no container should be none', async () => {
const health = await new PodManager().getHealth({
Containers: [],
} as unknown as PodInfo);
expect(health).toBe('none');
});
test('getHealth with one healthy should be healthy', async () => {
const health = await new PodManager().getHealth({
Containers: [
{
Id: 'healthy',
},
],
} as unknown as PodInfo);
expect(health).toBe('healthy');
});
test('getHealth with many healthy and one unhealthy should be unhealthy', async () => {
const health = await new PodManager().getHealth({
Containers: [
{
Id: 'healthy',
},
{
Id: 'unhealthy',
},
{
Id: 'healthy',
},
{
Id: 'starting',
},
],
} as unknown as PodInfo);
expect(health).toBe('unhealthy');
});
test('getHealth with many healthy and one starting should be starting', async () => {
const health = await new PodManager().getHealth({
Containers: [
{
Id: 'healthy',
},
{
Id: 'healthy',
},
{
Id: 'starting',
},
],
} as unknown as PodInfo);
expect(health).toBe('starting');
});
});
describe('getPod', () => {
test('getPod should throw an error if none is matching', async () => {
vi.mocked(containerEngine.listPods).mockResolvedValue([]);
await expect(async () => {
await new PodManager().getPod('fakeEngineId', 'fakePodId');
}).rejects.toThrowError('pod with engineId fakeEngineId and Id fakePodId cannot be found.');
});
test('getPod should return matching pod', async () => {
vi.mocked(containerEngine.listPods).mockResolvedValue([
{
engineId: 'engine-1',
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-value',
hello: 'world',
},
},
{
engineId: 'engine-2',
Id: 'pod-id-2',
Labels: {
hello: 'world',
'dummy-key': 'dummy-value',
},
},
{
engineId: 'engine-3',
Id: 'pod-id-3',
},
] as unknown as PodInfo[]);
const pod = await new PodManager().getPod('engine-3', 'pod-id-3');
expect(pod).toBeDefined();
expect(pod.engineId).toBe('engine-3');
expect(pod.Id).toBe('pod-id-3');
});
});
test('stopPod should call containerEngine.stopPod', async () => {
await new PodManager().stopPod('dummy-engine-id', 'dummy-pod-id');
expect(containerEngine.stopPod).toHaveBeenCalledWith('dummy-engine-id', 'dummy-pod-id');
});
test('removePod should call containerEngine.removePod', async () => {
await new PodManager().removePod('dummy-engine-id', 'dummy-pod-id');
expect(containerEngine.removePod).toHaveBeenCalledWith('dummy-engine-id', 'dummy-pod-id');
});
test('startPod should call containerEngine.startPod', async () => {
await new PodManager().startPod('dummy-engine-id', 'dummy-pod-id');
expect(containerEngine.startPod).toHaveBeenCalledWith('dummy-engine-id', 'dummy-pod-id');
});
test('createPod should call containerEngine.createPod', async () => {
const options: PodCreateOptions = {
name: 'dummy-name',
portmappings: [],
};
await new PodManager().createPod(options);
expect(containerEngine.createPod).toHaveBeenCalledWith(options);
});
test('dispose should dispose onEvent disposable', () => {
const disposableMock = vi.fn();
vi.mocked(containerEngine.onEvent).mockImplementation(() => {
return { dispose: disposableMock };
});
const podManager = new PodManager();
podManager.init();
podManager.dispose();
expect(containerEngine.onEvent).toHaveBeenCalled();
expect(disposableMock).toHaveBeenCalled();
});
const getInitializedPodManager = (): {
onEventListener: (e: ContainerJSONEvent) => unknown;
podManager: PodManager;
} => {
let func: ((e: ContainerJSONEvent) => unknown) | undefined = undefined;
vi.mocked(containerEngine.onEvent).mockImplementation(fn => {
func = fn;
return { dispose: vi.fn() };
});
const podManager = new PodManager();
podManager.init();
if (!func) throw new Error('listener should be defined');
return { onEventListener: func, podManager };
};
describe('events', () => {
test('onStartPodEvent listener should be called on start pod event', async () => {
vi.mocked(containerEngine.listPods).mockResolvedValue([
{
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-value',
hello: 'world',
},
},
] as unknown as PodInfo[]);
const { onEventListener, podManager } = getInitializedPodManager();
const startListenerMock = vi.fn();
podManager.onStartPodEvent(startListenerMock);
onEventListener({ id: 'pod-id-1', Type: 'pod', type: '', status: 'start' });
await vi.waitFor(() => {
expect(startListenerMock).toHaveBeenCalledWith({
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-value',
hello: 'world',
},
});
});
});
test('onStopPodEvent listener should be called on start pod event', async () => {
vi.mocked(containerEngine.listPods).mockResolvedValue([
{
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-value',
hello: 'world',
},
},
] as unknown as PodInfo[]);
const { onEventListener, podManager } = getInitializedPodManager();
const stopListenerMock = vi.fn();
podManager.onStopPodEvent(stopListenerMock);
onEventListener({ id: 'pod-id-1', Type: 'pod', type: '', status: 'stop' });
await vi.waitFor(() => {
expect(stopListenerMock).toHaveBeenCalledWith({
Id: 'pod-id-1',
Labels: {
'dummy-key': 'dummy-value',
hello: 'world',
},
});
});
});
test('onRemovePodEvent listener should be called on start pod event', async () => {
const { onEventListener, podManager } = getInitializedPodManager();
const removeListenerMock = vi.fn();
podManager.onRemovePodEvent(removeListenerMock);
onEventListener({ id: 'pod-id-1', Type: 'pod', type: '', status: 'remove' });
await vi.waitFor(() => {
expect(removeListenerMock).toHaveBeenCalledWith({
podId: 'pod-id-1',
});
});
expect(containerEngine.listPods).not.toHaveBeenCalled();
});
});
================================================
FILE: packages/backend/src/managers/recipes/PodManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { Disposable, PodCreateOptions, PodInfo, Event } from '@podman-desktop/api';
import { containerEngine, EventEmitter } from '@podman-desktop/api';
import type { PodHealth } from '@shared/models/IApplicationState';
import { getPodHealth } from '../../utils/podsUtils';
export interface PodEvent {
podId: string;
}
export class PodManager implements Disposable {
#eventDisposable: Disposable | undefined;
// start pod events
private readonly _onStartPodEvent = new EventEmitter();
readonly onStartPodEvent: Event = this._onStartPodEvent.event;
// stop pod events
private readonly _onStopPodEvent = new EventEmitter();
readonly onStopPodEvent: Event = this._onStopPodEvent.event;
// remove pod events
private readonly _onRemovePodEvent = new EventEmitter();
readonly onRemovePodEvent: Event = this._onRemovePodEvent.event;
dispose(): void {
this.#eventDisposable?.dispose();
}
init(): void {
this.#eventDisposable = containerEngine.onEvent(async event => {
// filter on pod event type
if (event.Type !== 'pod') {
return;
}
if (event.status === 'remove') {
return this._onRemovePodEvent.fire({
podId: event.id,
});
}
const pod: PodInfo = await this.getPodById(event.id);
switch (event.status) {
case 'start':
this._onStartPodEvent.fire(pod);
break;
case 'stop':
this._onStopPodEvent.fire(pod);
break;
}
});
}
/**
* Utility method to get all the pods
*/
getAllPods(): Promise {
return containerEngine.listPods();
}
/**
* return the first pod matching the provided labels and their associated value
* @param requestedLabels the labels the pod must be matching
*/
async findPodByLabelsValues(requestedLabels: Record): Promise {
const pods = await this.getAllPods();
return pods.find(pod => {
const labels = pod.Labels;
// eslint-disable-next-line sonarjs/different-types-comparison
if (labels === undefined) return false;
for (const [key, value] of Object.entries(requestedLabels)) {
if (!(key in labels) || labels[key] !== value) return false;
}
return true;
});
}
/**
* return pods containing all the labels provided
* This method does not check for the values, only existence
* @param labels
*/
async getPodsWithLabels(labels: string[]): Promise {
const pods = await this.getAllPods();
return pods.filter(pod => labels.every(label => !!pod.Labels && label in pod.Labels));
}
/**
* Given a pod Info, will fetch the health status of each containing composing it, and
* will return a PodHealth
* @param pod the pod to inspect
*/
async getHealth(pod: PodInfo): Promise {
const containerStates: (string | undefined)[] = await Promise.all(
pod.Containers.map(container =>
containerEngine.inspectContainer(pod.engineId, container.Id).then(data => data.State.Health?.Status),
),
);
return getPodHealth(containerStates);
}
/**
* This handy method is private as we do not want expose method not providing
* the engineId, but this is required because PodEvent do not provide the engineId
* @param id
* @private
*/
private async getPodById(id: string): Promise {
const pods = await this.getAllPods();
const result = pods.find(pod => pod.Id === id);
if (!result) throw new Error(`pod with Id ${id} cannot be found.`);
return result;
}
async getPod(engineId: string, Id: string): Promise {
const pods = await this.getAllPods();
const result = pods.find(pod => pod.engineId === engineId && pod.Id === Id);
if (!result) throw new Error(`pod with engineId ${engineId} and Id ${Id} cannot be found.`);
return result;
}
async stopPod(engineId: string, id: string): Promise {
return containerEngine.stopPod(engineId, id);
}
async removePod(engineId: string, id: string): Promise {
return containerEngine.removePod(engineId, id);
}
async startPod(engineId: string, id: string): Promise {
return containerEngine.startPod(engineId, id);
}
async createPod(podOptions: PodCreateOptions): Promise<{ engineId: string; Id: string }> {
return containerEngine.createPod(podOptions);
}
}
================================================
FILE: packages/backend/src/managers/recipes/RecipeManager.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, describe, expect, test, vi } from 'vitest';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { BuilderManager } from './BuilderManager';
import type { GitManager } from '../gitManager';
import type { LocalRepositoryRegistry } from '../../registries/LocalRepositoryRegistry';
import { RecipeManager } from './RecipeManager';
import { containerEngine, type ContainerProviderConnection } from '@podman-desktop/api';
import type { Recipe } from '@shared/models/IRecipe';
import type { Stats } from 'node:fs';
import { existsSync, statSync } from 'node:fs';
import { AIConfigFormat, parseYamlFile } from '../../models/AIConfig';
import { goarch } from '../../utils/arch';
import { VMType } from '@shared/models/IPodman';
import type { InferenceManager } from '../inference/inferenceManager';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { ApplicationOptions } from '../../models/ApplicationOptions';
const taskRegistryMock = {
createTask: vi.fn(),
updateTask: vi.fn(),
} as unknown as TaskRegistry;
const builderManagerMock = {
build: vi.fn(),
} as unknown as BuilderManager;
const gitManagerMock = {
processCheckout: vi.fn(),
} as unknown as GitManager;
const localRepositoriesMock = {
register: vi.fn(),
} as unknown as LocalRepositoryRegistry;
const inferenceManagerMock = {} as unknown as InferenceManager;
const recipeMock: Recipe = {
id: 'recipe-test',
name: 'Test Recipe',
categories: [],
description: 'test recipe description',
repository: 'http://test-repository.test',
readme: 'test recipe readme',
};
const connectionMock: ContainerProviderConnection = {
name: 'Podman Machine',
vmType: VMType.UNKNOWN,
} as unknown as ContainerProviderConnection;
const modelInfoMock: ModelInfo = {
id: 'modelId',
name: 'Model',
description: 'model to test',
} as unknown as ModelInfo;
vi.mock('../../models/AIConfig', () => ({
AIConfigFormat: {
CURRENT: 'current',
},
parseYamlFile: vi.fn(),
}));
vi.mock('node:fs', () => ({
existsSync: vi.fn(),
statSync: vi.fn(),
}));
vi.mock('@podman-desktop/api', () => ({
containerEngine: {
listImages: vi.fn(),
},
}));
vi.mock('../../utils/arch', () => ({
goarch: vi.fn(),
}));
beforeEach(() => {
vi.resetAllMocks();
vi.mocked(containerEngine.listImages).mockResolvedValue([]);
vi.mocked(taskRegistryMock.createTask).mockImplementation((name, state, labels) => ({
name,
state,
labels,
id: 'fake-task',
}));
vi.mocked(existsSync).mockReturnValue(true);
vi.mocked(statSync).mockReturnValue({
isDirectory: () => true,
} as unknown as Stats);
vi.mocked(parseYamlFile).mockReturnValue({
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
arch: ['dummy-arch'],
modelService: false,
name: 'test-container',
gpu_env: [],
contextdir: '.',
},
],
},
});
vi.mocked(goarch).mockReturnValue('dummy-arch');
});
async function getInitializedRecipeManager(): Promise {
const manager = new RecipeManager(
'test-app-user-directory',
gitManagerMock,
taskRegistryMock,
builderManagerMock,
localRepositoriesMock,
inferenceManagerMock,
);
manager.init();
return manager;
}
describe('cloneRecipe', () => {
test('error in checkout should set the task to error and propagate it', async () => {
vi.mocked(gitManagerMock.processCheckout).mockRejectedValue(new Error('clone error'));
const manager = await getInitializedRecipeManager();
await expect(() => {
return manager.cloneRecipe(recipeMock);
}).rejects.toThrowError('clone error');
expect(taskRegistryMock.updateTask).toHaveBeenCalledWith(
expect.objectContaining({
state: 'error',
}),
);
});
test('labels should be propagated', async () => {
const manager = await getInitializedRecipeManager();
await manager.cloneRecipe(recipeMock, {
'test-label': 'test-value',
});
expect(gitManagerMock.processCheckout).toHaveBeenCalledWith({
repository: recipeMock.repository,
ref: recipeMock.ref,
targetDirectory: expect.any(String),
});
expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Checking out repository', 'loading', {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
git: 'checkout',
});
expect(localRepositoriesMock.register).toHaveBeenCalledWith({
path: expect.any(String),
sourcePath: expect.any(String),
labels: {
'recipe-id': recipeMock.id,
},
});
});
});
describe.each([true, false])('buildRecipe, with model is %o', withModel => {
let applicationOptions: ApplicationOptions;
beforeEach(() => {
applicationOptions = withModel
? {
connection: connectionMock,
recipe: recipeMock,
model: modelInfoMock,
}
: {
connection: connectionMock,
recipe: recipeMock,
};
});
test('error in build propagate it', async () => {
vi.mocked(builderManagerMock.build).mockRejectedValue(new Error('build error'));
const manager = await getInitializedRecipeManager();
await expect(() => {
return manager.buildRecipe(applicationOptions);
}).rejects.toThrowError('build error');
});
test('labels should be propagated', async () => {
const manager = await getInitializedRecipeManager();
await manager.buildRecipe(applicationOptions, {
'test-label': 'test-value',
});
expect(taskRegistryMock.createTask).toHaveBeenCalledWith('Loading configuration', 'loading', {
'test-label': 'test-value',
'recipe-id': recipeMock.id,
});
expect(builderManagerMock.build).toHaveBeenCalledWith(
connectionMock,
recipeMock,
[
{
arch: ['dummy-arch'],
modelService: false,
name: 'test-container',
gpu_env: [],
contextdir: '.',
},
],
expect.any(String),
{
'test-label': 'test-value',
'recipe-id': recipeMock.id,
},
);
});
});
================================================
FILE: packages/backend/src/managers/recipes/RecipeManager.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { GitCloneInfo, GitManager } from '../gitManager';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { Recipe, RecipeComponents } from '@shared/models/IRecipe';
import path from 'node:path';
import type { Task } from '@shared/models/ITask';
import type { LocalRepositoryRegistry } from '../../registries/LocalRepositoryRegistry';
import type { AIConfig, AIConfigFile, ContainerConfig } from '../../models/AIConfig';
import { parseYamlFile } from '../../models/AIConfig';
import { existsSync, statSync } from 'node:fs';
import { goarch } from '../../utils/arch';
import type { BuilderManager } from './BuilderManager';
import type { Disposable } from '@podman-desktop/api';
import { CONFIG_FILENAME } from '../../utils/RecipeConstants';
import type { InferenceManager } from '../inference/inferenceManager';
import { withDefaultConfiguration } from '../../utils/inferenceUtils';
import type { InferenceServer } from '@shared/models/IInference';
import { type ApplicationOptions, isApplicationOptionsWithModelInference } from '../../models/ApplicationOptions';
export interface AIContainers {
aiConfigFile: AIConfigFile;
containers: ContainerConfig[];
}
export class RecipeManager implements Disposable {
constructor(
private appUserDirectory: string,
private git: GitManager,
private taskRegistry: TaskRegistry,
private builderManager: BuilderManager,
private localRepositories: LocalRepositoryRegistry,
private inferenceManager: InferenceManager,
) {}
dispose(): void {}
init(): void {}
private async doCheckout(gitCloneInfo: GitCloneInfo, labels?: { [id: string]: string }): Promise {
// Creating checkout task
const checkoutTask: Task = this.taskRegistry.createTask('Checking out repository', 'loading', {
...labels,
git: 'checkout',
});
try {
await this.git.processCheckout(gitCloneInfo);
checkoutTask.state = 'success';
} catch (err: unknown) {
checkoutTask.state = 'error';
checkoutTask.error = String(err);
// propagate error
throw err;
} finally {
// Update task registry
this.taskRegistry.updateTask(checkoutTask);
}
}
public async cloneRecipe(recipe: Recipe, labels?: { [key: string]: string }): Promise {
const localFolder = path.join(this.appUserDirectory, recipe.id);
// clone the recipe repository on the local folder
const gitCloneInfo: GitCloneInfo = {
repository: recipe.repository,
ref: recipe.ref,
targetDirectory: localFolder,
};
await this.doCheckout(gitCloneInfo, {
...labels,
'recipe-id': recipe.id,
});
this.localRepositories.register({
path: gitCloneInfo.targetDirectory,
sourcePath: path.join(gitCloneInfo.targetDirectory, recipe.basedir ?? ''),
labels: {
'recipe-id': recipe.id,
},
});
}
public async buildRecipe(options: ApplicationOptions, labels?: { [key: string]: string }): Promise {
const localFolder = path.join(this.appUserDirectory, options.recipe.id);
let inferenceServer: InferenceServer | undefined;
if (isApplicationOptionsWithModelInference(options)) {
// if the recipe has a defined backend, we gives priority to using an inference server
if (options.recipe.backend && options.recipe.backend === options.model.backend) {
let task: Task | undefined;
try {
inferenceServer = this.inferenceManager.findServerByModel(options.model);
task = this.taskRegistry.createTask('Starting Inference server', 'loading', labels);
if (!inferenceServer) {
const inferenceContainerId = await this.inferenceManager.createInferenceServer(
await withDefaultConfiguration({
modelsInfo: [options.model],
}),
);
inferenceServer = this.inferenceManager.get(inferenceContainerId);
this.taskRegistry.updateTask({
...task,
labels: {
...task.labels,
containerId: inferenceContainerId,
},
});
} else if (inferenceServer.status === 'stopped') {
await this.inferenceManager.startInferenceServer(inferenceServer.container.containerId);
}
task.state = 'success';
} catch (e) {
// we only skip the task update if the error is that we do not support this backend.
// If so, we build the image for the model service
if (task && String(e) !== 'no enabled provider could be found.') {
task.state = 'error';
task.error = `Something went wrong while starting the inference server: ${String(e)}`;
throw e;
}
} finally {
if (task) {
this.taskRegistry.updateTask(task);
}
}
}
}
// load and parse the recipe configuration file and filter containers based on architecture
const configAndFilteredContainers = this.getConfigAndFilterContainers(
options.recipe.basedir,
localFolder,
!!inferenceServer,
{
...labels,
'recipe-id': options.recipe.id,
},
);
const images = await this.builderManager.build(
options.connection,
options.recipe,
configAndFilteredContainers.containers,
configAndFilteredContainers.aiConfigFile.path,
{
...labels,
'recipe-id': options.recipe.id,
},
);
return {
images,
inferenceServer,
};
}
private getConfigAndFilterContainers(
recipeBaseDir: string | undefined,
localFolder: string,
useInferenceServer: boolean,
labels?: { [key: string]: string },
): AIContainers {
// Adding loading configuration task
const task = this.taskRegistry.createTask('Loading configuration', 'loading', labels);
let aiConfigFile: AIConfigFile;
try {
// load and parse the recipe configuration file
aiConfigFile = this.getConfiguration(recipeBaseDir, localFolder);
} catch (e) {
task.error = `Something went wrong while loading configuration: ${String(e)}.`;
this.taskRegistry.updateTask(task);
throw e;
}
// filter the containers based on architecture, gpu accelerator and backend (that define which model supports)
let filteredContainers: ContainerConfig[] = this.filterContainers(aiConfigFile.aiConfig);
// if we are using the inference server we can remove the model service
if (useInferenceServer) {
filteredContainers = filteredContainers.filter(c => !c.modelService);
}
if (filteredContainers.length > 0) {
// Mark as success.
task.state = 'success';
this.taskRegistry.updateTask(task);
} else {
// Mark as failure.
task.error = 'No containers available.';
this.taskRegistry.updateTask(task);
throw new Error('No containers available.');
}
return {
aiConfigFile: aiConfigFile,
containers: filteredContainers,
};
}
private filterContainers(aiConfig: AIConfig): ContainerConfig[] {
return aiConfig.application.containers.filter(
container => container.gpu_env.length === 0 && container.arch.some(arc => arc === goarch()),
);
}
private getConfiguration(recipeBaseDir: string | undefined, localFolder: string): AIConfigFile {
let configFile: string;
if (recipeBaseDir !== undefined) {
configFile = path.join(localFolder, recipeBaseDir, CONFIG_FILENAME);
} else {
configFile = path.join(localFolder, CONFIG_FILENAME);
}
if (!existsSync(configFile)) {
throw new Error(`The file located at ${configFile} does not exist.`);
}
// If the user configured the config as a directory we check for "ai-lab.yaml" inside.
if (statSync(configFile).isDirectory()) {
const tmpPath = path.join(configFile, CONFIG_FILENAME);
// If it has the ai-lab.yaml we use it.
if (existsSync(tmpPath)) {
configFile = tmpPath;
}
}
// Parsing the configuration
console.log(`Reading configuration from ${configFile}.`);
let aiConfig: AIConfig;
try {
aiConfig = parseYamlFile(configFile, goarch());
} catch (err) {
console.error('Cannot load configure file.', err);
throw new Error(`Cannot load configuration file.`);
}
// Mark as success.
return {
aiConfig,
path: configFile,
};
}
}
================================================
FILE: packages/backend/src/managers/snippets/java-okhttp-snippet.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { expect, test } from 'vitest';
import { javaOkHttpGenerator } from './java-okhttp-snippet';
test('expect return generated snippet', async () => {
const payload = await javaOkHttpGenerator({ url: 'http://localhost:32412/v1/chat/completions' });
expect(payload).toBeDefined();
expect(payload).toContain('.url("http://localhost:32412/v1/chat/completions")');
});
================================================
FILE: packages/backend/src/managers/snippets/java-okhttp-snippet.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { RequestOptions } from '@shared/models/RequestOptions';
import mustache from 'mustache';
import javaOkHttpTemplate from '../../templates/java-okhttp.mustache?raw';
export async function javaOkHttpGenerator(requestOptions: RequestOptions): Promise {
if (!requestOptions.url.endsWith('/v1/chat/completions')) throw new Error('Incompatible generator');
return mustache.render(javaOkHttpTemplate, {
endpoint: requestOptions.url,
});
}
================================================
FILE: packages/backend/src/managers/snippets/python-langchain-snippet.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { expect, test } from 'vitest';
import { pythonLangChainGenerator } from './python-langchain-snippet';
test('expect return generated snippet', async () => {
const payload = await pythonLangChainGenerator({ url: 'http://localhost:32412/v1/chat/completions' });
expect(payload).toBeDefined();
expect(payload).toContain('model_service = "http://localhost:32412/v1/"');
});
================================================
FILE: packages/backend/src/managers/snippets/python-langchain-snippet.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { RequestOptions } from '@shared/models/RequestOptions';
import mustache from 'mustache';
import pythonLangChainTemplate from '../../templates/python-langchain.mustache?raw';
export async function pythonLangChainGenerator(requestOptions: RequestOptions): Promise {
if (!requestOptions.url.endsWith('/v1/chat/completions')) throw new Error('Incompatible generator');
return mustache.render(pythonLangChainTemplate, {
endpoint: requestOptions.url.replace('chat/completions', ''),
});
}
================================================
FILE: packages/backend/src/managers/snippets/quarkus-snippet.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, test, vi } from 'vitest';
import { quarkusLangchain4Jgenerator } from './quarkus-snippet';
beforeEach(() => {
vi.resetAllMocks();
});
test('expect fetched version in generated payload', async () => {
const oldFetch = global.fetch;
try {
global.fetch = vi.fn().mockResolvedValue({
text: () =>
Promise.resolve(
'io.quarkiverse.langchain4jquarkus-langchain4j-corelatest-versionrelease-version',
),
});
const payload = await quarkusLangchain4Jgenerator({ url: 'http://localhost:32412/v1/chat/completions' });
expect(payload).toBeDefined();
expect(payload).toContain('release-version');
} finally {
global.fetch = oldFetch;
}
});
================================================
FILE: packages/backend/src/managers/snippets/quarkus-snippet.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { RequestOptions } from '@shared/models/RequestOptions';
import mustache from 'mustache';
import template from '../../templates/quarkus-langchain4j.mustache?raw';
import xmljs from 'xml-js';
const SUFFIX_LENGTH = '/chat/completions'.length;
const METADATA_URL =
'https://repo1.maven.org/maven2/io/quarkiverse/langchain4j/quarkus-langchain4j-core/maven-metadata.xml';
let quarkusLangchain4jVersion: string;
async function getQuarkusLangchain4jVersion(): Promise {
if (quarkusLangchain4jVersion) {
return quarkusLangchain4jVersion;
}
const response = await fetch(METADATA_URL, { redirect: 'follow' });
const content = JSON.parse(xmljs.xml2json(await response.text(), { compact: true }));
// eslint-disable-next-line sonarjs/no-nested-assignment
return (quarkusLangchain4jVersion = content.metadata.versioning.release._text);
}
export async function quarkusLangchain4Jgenerator(requestOptions: RequestOptions): Promise {
if (!requestOptions.url.endsWith('/v1/chat/completions')) throw new Error('Incompatible generator');
return mustache.render(template, {
baseUrl: requestOptions.url.substring(0, requestOptions.url.length - SUFFIX_LENGTH),
version: await getQuarkusLangchain4jVersion(),
});
}
================================================
FILE: packages/backend/src/models/AIConfig.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { expect, test, describe, vi } from 'vitest';
import fs from 'node:fs';
import { type AIConfig, AIConfigFormat, parseYamlFile } from './AIConfig';
// Define mock file paths and contents
const mockYamlPath = '/path/to/mock.yml';
const defaultArch = 'x64';
const readFileSync = vi.spyOn(fs, 'readFileSync');
describe('parseYaml', () => {
test('malformed configuration', () => {
readFileSync.mockReturnValue(``);
expect(() => {
parseYamlFile(mockYamlPath, defaultArch);
}).toThrowError('malformed configuration file.');
});
test('missing application property', () => {
readFileSync.mockReturnValue(`
wrong:
`);
expect(() => {
parseYamlFile(mockYamlPath, defaultArch);
}).toThrowError('malformed configuration file: missing version');
});
test('version mismatch', () => {
readFileSync.mockReturnValue(`
version: unknown
application: true
`);
expect(() => {
parseYamlFile(mockYamlPath, defaultArch);
}).toThrowError('malformed configuration file: version not supported, got unknown expected v1.0.');
});
test('application primitive', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application: true
`);
expect(() => {
parseYamlFile(mockYamlPath, defaultArch);
}).toThrowError('AIConfig has bad formatting: application does not have valid container property');
});
test('containers not an array', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
name: container1
contextdir: /path/to/dir1
arch: ["x86"]
model-service: true
gpu-env: ["env1", "env2"]
ports: [ 8080 ]
`);
expect(() => {
parseYamlFile(mockYamlPath, defaultArch);
}).toThrowError('AIConfig has bad formatting: containers property must be an array.');
});
test('containers object', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers: true
`);
expect(() => {
parseYamlFile(mockYamlPath, defaultArch);
}).toThrowError('AIConfig has bad formatting: containers property must be an array.');
});
test('should use architecture as string', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
- name: container1
contextdir: /path/to/dir1
arch: x86
ports: [ 8080 ]
`);
const expectedConfig: AIConfig = {
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
name: 'container1',
contextdir: '/path/to/dir1',
arch: ['x86'],
gpu_env: [],
modelService: false,
ports: [8080],
},
],
},
};
expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig);
});
test('should use all architectures', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
- name: container1
contextdir: /path/to/dir1
arch: ['arch1', 'arch2']
ports: [ 8080 ]
`);
const expectedConfig: AIConfig = {
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
name: 'container1',
contextdir: '/path/to/dir1',
arch: ['arch1', 'arch2'],
gpu_env: [],
modelService: false,
ports: [8080],
},
],
},
};
expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig);
});
test('should put the default architecture', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
- name: container1
contextdir: /path/to/dir1
ports: [ 8080 ]
`);
const expectedConfig: AIConfig = {
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
name: 'container1',
contextdir: '/path/to/dir1',
arch: [defaultArch],
gpu_env: [],
modelService: false,
ports: [8080],
},
],
},
};
expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig);
});
test('should use the image provided in the config', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
- name: container1
contextdir: /path/to/dir1
ports: [ 8080 ]
image: dummy-image
`);
const expectedConfig: AIConfig = {
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
name: 'container1',
contextdir: '/path/to/dir1',
arch: [defaultArch],
gpu_env: [],
modelService: false,
ports: [8080],
image: 'dummy-image',
},
],
},
};
expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig);
});
test('ports should always be a final number', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
- name: container1
contextdir: /path/to/dir1
ports: [ '8080', 8888 ]
image: dummy-image
`);
const expectedConfig: AIConfig = {
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
name: 'container1',
contextdir: '/path/to/dir1',
arch: [defaultArch],
gpu_env: [],
modelService: false,
ports: [8080, 8888],
image: 'dummy-image',
},
],
},
};
expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig);
});
test('should use gpu env', () => {
readFileSync.mockReturnValue(`
version: ${AIConfigFormat.CURRENT}
application:
containers:
- name: container1
contextdir: /path/to/dir1
arch: ["x86"]
model-service: true
gpu-env: ["env1", "env2"]
ports: [ 8080 ]
- name: container2
arch: ["arm"]
ports: [ 8001 ]
`);
const expectedConfig: AIConfig = {
version: AIConfigFormat.CURRENT,
application: {
containers: [
{
name: 'container1',
contextdir: '/path/to/dir1',
arch: ['x86'],
modelService: true,
gpu_env: ['env1', 'env2'],
ports: [8080],
},
{
name: 'container2',
contextdir: '.',
arch: ['arm'],
modelService: false,
gpu_env: [],
ports: [8001],
},
],
},
};
expect(parseYamlFile(mockYamlPath, defaultArch)).toEqual(expectedConfig);
});
});
================================================
FILE: packages/backend/src/models/AIConfig.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import * as jsYaml from 'js-yaml';
import fs from 'node:fs';
export interface ContainerConfig {
name: string;
contextdir: string;
containerfile?: string;
arch: string[];
modelService: boolean;
gpu_env: string[];
ports?: number[];
image?: string;
backend?: string[];
}
export enum AIConfigFormat {
CURRENT = 'v1.0',
}
export interface AIConfig {
version: AIConfigFormat;
application: {
containers: ContainerConfig[];
};
}
export interface AIConfigFile {
aiConfig: AIConfig;
path: string;
}
export function isString(value: unknown): value is string {
return (!!value && typeof value === 'string') || value instanceof String;
}
export function assertString(value: unknown): string {
if (isString(value)) return value;
throw new Error('value not a string');
}
export function parseYamlFile(filepath: string, defaultArch: string): AIConfig {
const raw: string = fs.readFileSync(filepath, 'utf-8');
const aiLabConfig: unknown = jsYaml.load(raw);
if (!aiLabConfig || typeof aiLabConfig !== 'object') {
throw new Error('malformed configuration file.');
}
if (!('version' in aiLabConfig) || typeof aiLabConfig.version !== 'string')
throw new Error('malformed configuration file: missing version');
if (aiLabConfig.version !== AIConfigFormat.CURRENT)
throw new Error(
`malformed configuration file: version not supported, got ${aiLabConfig.version} expected ${AIConfigFormat.CURRENT}.`,
);
if (!('application' in aiLabConfig)) {
throw new Error('malformed configuration file: missing application property');
}
const application: unknown = aiLabConfig['application'];
if (!application || typeof application !== 'object' || !('containers' in application)) {
throw new Error('AIConfig has bad formatting: application does not have valid container property');
}
if (!Array.isArray(application['containers'])) {
throw new Error('AIConfig has bad formatting: containers property must be an array.');
}
const containers: unknown[] = application['containers'];
return {
version: AIConfigFormat.CURRENT,
application: {
containers: containers.map(container => {
if (!container || typeof container !== 'object') throw new Error('containers array malformed');
let contextdir: string;
if ('contextdir' in container) {
contextdir = assertString(container['contextdir']);
} else {
contextdir = '.';
}
const architectures: string[] = [];
if (!('arch' in container)) {
architectures.push(defaultArch);
} else if (Array.isArray(container['arch']) && container['arch'].every(arch => typeof arch === 'string')) {
architectures.push(...container['arch']);
} else if (typeof container['arch'] === 'string') {
architectures.push(container['arch']);
} else {
throw new Error('malformed arch property');
}
let containerfile: string | undefined = undefined;
if ('containerfile' in container && isString(container['containerfile'])) {
containerfile = container['containerfile'];
}
if (!('name' in container) || typeof container['name'] !== 'string') {
throw new Error('invalid name property: must be string');
}
return {
arch: architectures,
modelService: 'model-service' in container && container['model-service'] === true,
containerfile,
contextdir: contextdir,
name: container['name'],
gpu_env: 'gpu-env' in container && Array.isArray(container['gpu-env']) ? container['gpu-env'] : [],
ports:
'ports' in container && Array.isArray(container['ports'])
? container['ports'].map(port => parseInt(port))
: [],
image: 'image' in container && isString(container['image']) ? container['image'] : undefined,
backend: 'backend' in container && Array.isArray(container['backend']) ? container['backend'] : undefined,
};
}),
},
};
}
================================================
FILE: packages/backend/src/models/ApplicationOptions.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { ContainerProviderConnection } from '@podman-desktop/api';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { Recipe, RecipeDependencies } from '@shared/models/IRecipe';
export type ApplicationOptions = ApplicationOptionsDefault | ApplicationOptionsWithModelInference;
export interface ApplicationOptionsDefault {
connection: ContainerProviderConnection;
recipe: Recipe;
dependencies?: RecipeDependencies;
}
export type ApplicationOptionsWithModelInference = ApplicationOptionsDefault & {
model: ModelInfo;
};
export function isApplicationOptionsWithModelInference(
options: ApplicationOptions,
): options is ApplicationOptionsWithModelInference {
return 'model' in options;
}
================================================
FILE: packages/backend/src/models/HuggingFaceModelHandler.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { EventEmitter } from '@podman-desktop/api';
import type { TelemetryLogger } from '@podman-desktop/api';
import { beforeEach, expect, test, vi } from 'vitest';
import { ModelsManager } from '../managers/modelsManager';
import type { CatalogManager } from '../managers/catalogManager';
import type { ModelInfo } from '@shared/models/IModelInfo';
import { TaskRegistry } from '../registries/TaskRegistry';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import type { PodmanConnection } from '../managers/podmanConnection';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import { ModelHandlerRegistry } from '../registries/ModelHandlerRegistry';
import { HuggingFaceModelHandler } from './HuggingFaceModelHandler';
import { snapshotDownload } from '@huggingface/hub';
import type { RpcExtension } from '@shared/messages/MessageProxy';
vi.mock('@podman-desktop/api', () => {
return {
EventEmitter: vi.fn(),
};
});
vi.mock('@huggingface/hub', () => {
return {
scanCacheDir: vi.fn(),
snapshotDownload: vi.fn(),
};
});
const rpcExtensionMock = {
fire: vi.fn(),
} as unknown as RpcExtension;
const catalogManagerMock = {
getModels(): ModelInfo[] {
return [
{ id: 'model-id-1', name: 'model-id-1-model' } as ModelInfo,
{ id: 'model-id-2', name: 'model-id-2-model' } as ModelInfo,
];
},
onUpdate: vi.fn(),
} as unknown as CatalogManager;
const telemetryLogger = {
logUsage: vi.fn(),
logError: vi.fn(),
} as unknown as TelemetryLogger;
const taskRegistry: TaskRegistry = new TaskRegistry(rpcExtensionMock);
const cancellationTokenRegistryMock = {
createCancellationTokenSource: vi.fn(),
} as unknown as CancellationTokenRegistry;
const podmanConnectionMock = {
getContainerProviderConnections: vi.fn(),
} as unknown as PodmanConnection;
const configurationRegistryMock = {
getExtensionConfiguration: vi.fn(),
} as unknown as ConfigurationRegistry;
const modelHandlerRegistry = new ModelHandlerRegistry(rpcExtensionMock);
const modelsManager: ModelsManager = new ModelsManager(
rpcExtensionMock,
catalogManagerMock,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
podmanConnectionMock,
configurationRegistryMock,
modelHandlerRegistry,
);
const huggingFaceModelHandler = new HuggingFaceModelHandler(modelsManager);
beforeEach(() => {
const listeners: ((value: unknown) => void)[] = [];
const eventReturned = {
event: vi.fn(),
fire: vi.fn(),
};
vi.mocked(EventEmitter).mockReturnValue(eventReturned as unknown as EventEmitter);
vi.mocked(eventReturned.event).mockImplementation(callback => {
listeners.push(callback);
});
vi.mocked(eventReturned.fire).mockImplementation((content: unknown) => {
listeners.forEach(listener => listener(content));
});
});
test('check http url are not supported', () => {
expect(huggingFaceModelHandler.accept('http://example.com')).toBe(false);
});
test('check https url are not supported', () => {
expect(huggingFaceModelHandler.accept('http://example.com')).toBe(false);
});
test('check huggingface url are supported', () => {
expect(huggingFaceModelHandler.accept('huggingface://ibm-granite/my-model')).toBe(true);
});
test('download reports error', async () => {
vi.mocked(snapshotDownload).mockRejectedValue(new Error('error'));
const listenerMock = vi.fn();
const downloader = huggingFaceModelHandler.createDownloader(
{ id: 'model-id-1', name: 'model-id-1-model', url: 'huggingface://ibm-granite/my-model' } as ModelInfo,
{ aborted: false } as AbortSignal,
);
downloader.onEvent(listenerMock);
let err: unknown;
try {
await downloader.perform('model-id-1');
} catch (error) {
err = error;
}
expect(err).toBeDefined();
expect(listenerMock).toHaveBeenCalledWith({
id: 'model-id-1',
message: 'Something went wrong: Error: error.',
status: 'error',
});
});
test('download returns cache in path', async () => {
vi.mocked(snapshotDownload).mockResolvedValue('cache-path');
const listenerMock = vi.fn();
const downloader = huggingFaceModelHandler.createDownloader(
{ id: 'model-id-1', name: 'model-id-1-model', url: 'huggingface://ibm-granite/my-model' } as ModelInfo,
{ aborted: false } as AbortSignal,
);
downloader.onEvent(listenerMock);
await downloader.perform('model-id-1');
expect(downloader.getTarget()).toBe('cache-path');
expect(listenerMock).toHaveBeenCalledWith({
duration: expect.anything(),
id: 'model-id-1',
message: expect.anything(),
status: 'completed',
});
});
================================================
FILE: packages/backend/src/models/HuggingFaceModelHandler.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { ModelHandler } from './ModelHandler';
import type { ModelInfo } from '@shared/models/IModelInfo';
import { Downloader } from '../utils/downloader';
import { scanCacheDir, snapshotDownload } from '@huggingface/hub';
import type { CompletionEvent } from './baseEvent';
import { getDurationSecondsSince } from '../utils/utils';
import type { ModelsManager } from '../managers/modelsManager';
import fs from 'node:fs/promises';
function parseURL(url: string): { repo: string; revision?: string } | undefined {
const u = URL.parse(url);
if (u) {
return { repo: u.pathname.slice(1), revision: u.searchParams.get('revision') ?? 'main' };
}
return undefined;
}
class HuggingFaceDownloader extends Downloader {
#target: string = '';
constructor(
url: string,
private repo: string,
private revision: string | undefined,
private abortSignal: AbortSignal,
) {
super(url, '');
}
override getTarget(): string {
return this.#target;
}
async perform(id: string): Promise {
const startTime = performance.now();
try {
this.#target = await snapshotDownload({
repo: this.repo,
revision: this.revision,
});
const durationSeconds = getDurationSecondsSince(startTime);
this._onEvent.fire({
id: id,
status: 'completed',
message: `Duration ${durationSeconds}s.`,
duration: durationSeconds,
} as CompletionEvent);
} catch (err: unknown) {
if (!this.abortSignal?.aborted) {
this._onEvent.fire({
id: id,
status: 'error',
message: `Something went wrong: ${String(err)}.`,
});
} else {
this._onEvent.fire({
id: id,
status: 'canceled',
message: `Request cancelled: ${String(err)}.`,
});
}
throw err;
} finally {
this.completed = true;
}
}
}
export class HuggingFaceModelHandler extends ModelHandler {
constructor(modelsManager: ModelsManager) {
super('huggingface model registry', modelsManager);
}
accept(url: string): boolean {
return url.startsWith('huggingface') || url.startsWith('hf');
}
createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader {
const result = parseURL(model.url!);
if (result) {
return new HuggingFaceDownloader(model.url!, result.repo, result.revision, abortSignal);
}
throw new Error(`Invalid URL: ${model.url} for model ${model.name}`);
}
async deleteModel(model: ModelInfo): Promise {
if (model.file) {
await fs.rm(model.file?.path, { recursive: true });
} else {
throw new Error(`Model ${model.name} not downloaded yet.`);
}
}
dispose(): void {}
async getLocalModelsFromDisk(): Promise {
const hfModels = this.modelsManager
.getModelsInfo()
.filter(model => model.url && this.accept(model.url))
.map(model => {
return { model: model, repo: parseURL(model.url!) };
})
.filter(info => info.repo);
scanCacheDir()
.then(hfinfo => {
for (const repo of hfinfo.repos) {
for (const revision of repo.revisions) {
for (const ref of revision.refs) {
const model = hfModels.find(m => m.repo?.repo === repo.id.name && m.repo?.revision === ref);
if (model) {
model.model.file = {
path: revision.path,
file: '',
creation: revision.lastModifiedAt,
size: revision.size,
};
}
}
}
}
})
.catch((err: unknown): void => {
console.error('Something went wrong while scanning cache.', err);
});
}
}
================================================
FILE: packages/backend/src/models/ModelHandler.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { Disposable } from '@podman-desktop/api';
import { EventEmitter } from '@podman-desktop/api';
import type { Downloader } from '../utils/downloader';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { ModelsManager } from '../managers/modelsManager';
export abstract class ModelHandler implements Disposable {
readonly name: string;
readonly modelsManager: ModelsManager;
protected _onUpdate = new EventEmitter();
readonly onUpdate = this._onUpdate.event;
protected constructor(name: string, modelsManager: ModelsManager) {
this.name = name;
this.modelsManager = modelsManager;
}
/**
* Releases any resources held by the model handler.
*/
abstract dispose(): void;
/**
* Returns true if the model handler can handle the given URL.
* @param url
*/
abstract accept(url: string): boolean;
/**
* Creates a downloader for the given model.
* @param model the model to download
* @param abortSignal the signal to abort the download
*/
abstract createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader;
/**
* Retrieves the local models from disk.
*/
abstract getLocalModelsFromDisk(): Promise;
/**
* Deletes the given model from local storage.
* @param model the model
*/
abstract deleteModel(model: ModelInfo): Promise;
}
================================================
FILE: packages/backend/src/models/TaskRunner.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
export interface RunAsTaskOptions {
loadingLabel: string;
// label set when the task terminates normally, by default the loading label is kept
successLabel?: string;
// label set when the task terminates in error, by default the loading label is kept
errorLabel?: string;
// the error message to display when task terminates in error
errorMsg: (err: unknown) => string;
// if true, all subtasks (tasks found with the same labels) will be immediately marked in error if this task fails
failFastSubtasks?: boolean;
}
export interface TaskRunnerTools {
updateLabels: (f: (labels: Record) => Record) => void;
}
================================================
FILE: packages/backend/src/models/URLModelHandler.ts
================================================
/**********************************************************************
* Copyright (C) 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import fs from 'node:fs';
import { basename, join, resolve } from 'node:path';
import type { FileSystemWatcher } from '@podman-desktop/api';
import { fs as apiFs } from '@podman-desktop/api';
import { ModelHandler } from './ModelHandler';
import type { ModelsManager } from '../managers/modelsManager';
import type { ModelInfo } from '@shared/models/IModelInfo';
import type { Downloader } from '../utils/downloader';
import { URLDownloader } from '../utils/urldownloader';
export class URLModelHandler extends ModelHandler {
#watcher: FileSystemWatcher;
constructor(
modelsManager: ModelsManager,
private modelsDir: string,
) {
super('url model registry', modelsManager);
this.#watcher = apiFs.createFileSystemWatcher(this.modelsDir);
this.#watcher.onDidCreate(() => this._onUpdate.fire());
this.#watcher.onDidDelete(() => this._onUpdate.fire());
this.#watcher.onDidChange(() => this._onUpdate.fire());
}
override dispose(): void {
this.#watcher.dispose();
}
override accept(url: string): boolean {
return url.startsWith('https') || url.startsWith('http') || url.startsWith('file');
}
override createDownloader(model: ModelInfo, abortSignal: AbortSignal): Downloader {
const destDir = join(this.modelsDir, model.id);
const target = resolve(destDir, basename(model.url!));
return new URLDownloader(model.url!, target, model.sha256, abortSignal);
}
override async getLocalModelsFromDisk(): Promise {
if (!fs.existsSync(this.modelsDir)) {
return;
}
const entries = await fs.promises.readdir(this.modelsDir, { withFileTypes: true });
const dirs = entries.filter(dir => dir.isDirectory());
for (const d of dirs) {
const modelEntries = await fs.promises.readdir(resolve(d.parentPath, d.name));
if (modelEntries.length !== 1) {
// we support models with one file only for now
continue;
}
const modelFile = modelEntries[0];
const fullPath = resolve(d.parentPath, d.name, modelFile);
// Check for corresponding models or tmp file that should be ignored
try {
const model = this.modelsManager.getModelInfo(d.name);
if (fullPath.endsWith('.tmp')) {
continue;
}
let info: { size?: number; mtime?: Date } = { size: undefined, mtime: undefined };
try {
info = await fs.promises.stat(fullPath);
} catch (err: unknown) {
console.error('Something went wrong while getting file stats (probably in use).', err);
}
model.file = {
file: modelFile,
path: resolve(d.parentPath, d.name),
size: info.size,
creation: info.mtime,
};
} catch (e: unknown) {
console.warn(`Can't find model info for local folder ${d.name}.`, e);
}
}
}
async deleteModel(model: ModelInfo): Promise {
const folder = resolve(this.modelsDir, model.id);
await fs.promises.rm(folder, { recursive: true, force: true, maxRetries: 3 });
}
}
================================================
FILE: packages/backend/src/models/baseEvent.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
export interface BaseEvent {
id: string;
status: 'error' | 'completed' | 'progress' | 'canceled';
message?: string;
}
export interface CompletionEvent extends BaseEvent {
status: 'completed' | 'error' | 'canceled';
duration: number;
}
export interface ProgressEvent extends BaseEvent {
status: 'progress';
value: number;
total: number;
}
export const isCompletionEvent = (value: unknown): value is CompletionEvent => {
return (
!!value &&
typeof value === 'object' &&
'status' in value &&
typeof value['status'] === 'string' &&
['canceled', 'completed', 'error'].includes(value['status'])
);
};
export const isProgressEvent = (value: unknown): value is ProgressEvent => {
return (
!!value && typeof value === 'object' && 'status' in value && value['status'] === 'progress' && 'value' in value
);
};
================================================
FILE: packages/backend/src/registries/ApplicationRegistry.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import type { RecipeModelIndex } from '@shared/models/IRecipeModelIndex';
export class ApplicationRegistry {
#applications = new Map();
keys(): RecipeModelIndex[] {
return Array.from(this.#applications.values()).map(a => ({ recipeId: a.recipeId, modelId: a.modelId }));
}
has(recipeModel: RecipeModelIndex): boolean {
return this.#applications.has(this.hash(recipeModel));
}
delete(recipeModel: RecipeModelIndex): boolean {
return this.#applications.delete(this.hash(recipeModel));
}
values(): IterableIterator {
return this.#applications.values();
}
get(recipeModel: RecipeModelIndex): T {
const application = this.#applications.get(this.hash(recipeModel));
if (!application) throw new Error('application not found.');
return application;
}
set(recipeModel: RecipeModelIndex, value: T): void {
this.#applications.set(this.hash(recipeModel), value);
}
clear(): void {
this.#applications.clear();
}
private hash(recipeModel: RecipeModelIndex): string {
return recipeModel.recipeId + recipeModel.modelId;
}
}
================================================
FILE: packages/backend/src/registries/CancellationTokenRegistry.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, test, vi } from 'vitest';
import { CancellationTokenRegistry } from './CancellationTokenRegistry';
import { CancellationTokenSource, EventEmitter } from '@podman-desktop/api';
vi.mock('@podman-desktop/api', async () => {
return {
EventEmitter: vi.fn(),
CancellationTokenSource: vi.fn(),
};
});
beforeEach(() => {
vi.resetAllMocks();
// mock event emitters
const listeners: ((value: unknown) => void)[] = [];
vi.mocked(EventEmitter).mockReturnValue({
event: vi.fn().mockImplementation(callback => {
listeners.push(callback);
}),
dispose: vi.fn(),
fire: vi.fn().mockImplementation((content: unknown) => {
listeners.forEach(listener => listener(content));
}),
} as unknown as EventEmitter);
vi.mocked(CancellationTokenSource).mockReturnValue({
cancel: vi.fn(),
dispose: vi.fn(),
token: {
isCancellationRequested: false,
onCancellationRequested: vi.fn(),
},
});
});
test('created token should be retrievable', () => {
const registry = new CancellationTokenRegistry();
const tokenId = registry.createCancellationTokenSource();
expect(tokenId).toBeDefined();
expect(registry.hasCancellationTokenSource(tokenId)).toBeTruthy();
});
test('created token should not be cancelled', () => {
const registry = new CancellationTokenRegistry();
const source = registry.getCancellationTokenSource(registry.createCancellationTokenSource());
expect(source).toBeDefined();
expect(source?.token.isCancellationRequested).toBeFalsy();
});
test('cancel token should be removed from registry', () => {
const registry = new CancellationTokenRegistry();
const tokenId = registry.createCancellationTokenSource();
expect(registry.hasCancellationTokenSource(tokenId)).toBeTruthy();
registry.cancel(tokenId);
expect(registry.hasCancellationTokenSource(tokenId)).toBeFalsy();
});
test('disposing registry should dispose with cancel all tokens', () => {
const registry = new CancellationTokenRegistry();
const source = registry.getCancellationTokenSource(registry.createCancellationTokenSource());
registry.dispose();
expect(source?.cancel).toHaveBeenCalled();
expect(source?.dispose).toHaveBeenCalled();
});
test('creating cancellation token with function should register it', () => {
const registry = new CancellationTokenRegistry();
const func = vi.fn();
const source = registry.getCancellationTokenSource(registry.createCancellationTokenSource(func));
expect(source?.token.onCancellationRequested).toHaveBeenCalledWith(func);
});
================================================
FILE: packages/backend/src/registries/CancellationTokenRegistry.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { CancellationTokenSource, type Disposable } from '@podman-desktop/api';
export class CancellationTokenRegistry implements Disposable {
#callbackId: number;
#callbacksCancellableToken: Map;
constructor() {
this.#callbackId = 0;
this.#callbacksCancellableToken = new Map();
}
/**
* Creating a cancellation token.
* @param func an optional function that will be called when the cancel action will be triggered
*/
createCancellationTokenSource(func?: () => void): number {
// keep track of this request
this.#callbackId++;
const token = new CancellationTokenSource();
if (func !== undefined) {
token.token.onCancellationRequested(func);
}
// store the callback that will resolve the promise
this.#callbacksCancellableToken.set(this.#callbackId, token);
return this.#callbackId;
}
getCancellationTokenSource(id: number): CancellationTokenSource | undefined {
if (this.hasCancellationTokenSource(id)) {
return this.#callbacksCancellableToken.get(id);
}
return undefined;
}
hasCancellationTokenSource(id: number): boolean {
return this.#callbacksCancellableToken.has(id);
}
cancel(tokenId: number): void {
if (!this.hasCancellationTokenSource(tokenId))
throw new Error(`Cancellation token with id ${tokenId} does not exist.`);
this.getCancellationTokenSource(tokenId)?.cancel();
this.delete(tokenId);
}
delete(tokenId: number): void {
this.#callbacksCancellableToken.delete(tokenId);
}
dispose(): void {
Array.from(this.#callbacksCancellableToken.values()).forEach(source => {
source.cancel();
source.dispose();
});
this.#callbacksCancellableToken.clear();
}
}
================================================
FILE: packages/backend/src/registries/ConfigurationRegistry.spec.ts
================================================
/**********************************************************************
* Copyright (C) 2024-2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { vi, expect, test } from 'vitest';
import { configuration, type Configuration } from '@podman-desktop/api';
import { ConfigurationRegistry } from './ConfigurationRegistry';
import type { RpcExtension } from '@shared/messages/MessageProxy';
const fakeConfiguration = {
get: vi.fn(),
has: vi.fn(),
update: vi.fn(),
} as unknown as Configuration;
const rpcExtensionMock = {
fire: vi.fn().mockResolvedValue(true),
} as unknown as RpcExtension;
vi.mock('@podman-desktop/api', async () => {
return {
configuration: {
getConfiguration: (): unknown => fakeConfiguration,
onDidChangeConfiguration: vi.fn(),
},
};
});
test('init should init listener', () => {
const registry = new ConfigurationRegistry(rpcExtensionMock, 'appdir');
vi.mocked(fakeConfiguration.has).mockReturnValue(true);
registry.init();
expect(configuration.onDidChangeConfiguration).toHaveBeenCalled();
});
test('dispose should dispose listener', () => {
const registry = new ConfigurationRegistry(rpcExtensionMock, 'appdir');
vi.mocked(fakeConfiguration.has).mockReturnValue(true);
const disposeMock = vi.fn();
vi.mocked(configuration.onDidChangeConfiguration).mockReturnValue({ dispose: disposeMock });
registry.init();
expect(configuration.onDidChangeConfiguration).toHaveBeenCalled();
registry.dispose();
expect(disposeMock).toHaveBeenCalled();
});
test('update should trigger configuration update', async () => {
const registry = new ConfigurationRegistry(rpcExtensionMock, 'appdir');
vi.mocked(fakeConfiguration.has).mockReturnValue(true);
vi.mocked(fakeConfiguration.update).mockResolvedValue(undefined);
registry.init();
await registry.updateExtensionConfiguration({ modelsPath: '' });
expect(fakeConfiguration.update).toHaveBeenCalledWith('models.path', '');
});
================================================
FILE: packages/backend/src/registries/ConfigurationRegistry.ts
================================================
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { configuration, version, type Configuration, type Disposable } from '@podman-desktop/api';
import { Publisher } from '../utils/Publisher';
import type { ExtensionConfiguration } from '@shared/models/IExtensionConfiguration';
import { MSG_CONFIGURATION_UPDATE } from '@shared/Messages';
import path from 'node:path';
import type { RpcExtension } from '@shared/messages/MessageProxy';
const CONFIGURATION_SECTIONS: string[] = [
'models.path',
'experimentalGPU',
'apiPort',
'inferenceRuntime',
'experimentalTuning',
'modelUploadDisabled',
'showGPUPromotion',
'appearance',
];
const API_PORT_DEFAULT = 10434;
export class ConfigurationRegistry extends Publisher implements Disposable {
#configuration: Configuration;
#configurationPodmanDesktop: Configuration;
#configurationDisposable: Disposable | undefined;
constructor(
rpcExtension: RpcExtension,
private appUserDirectory: string,
) {
super(rpcExtension, MSG_CONFIGURATION_UPDATE, () => this.getExtensionConfiguration());
this.#configuration = configuration.getConfiguration('ai-lab');
this.#configurationPodmanDesktop = configuration.getConfiguration('preferences');
}
getExtensionConfiguration(): ExtensionConfiguration {
return {
modelsPath: this.getModelsPath(),
experimentalGPU: this.#configuration.get('experimentalGPU') ?? false,
apiPort: this.#configuration.get('apiPort') ?? API_PORT_DEFAULT,
inferenceRuntime: this.#configuration.get('inferenceRuntime') ?? 'all',
experimentalTuning: this.#configuration.get('experimentalTuning') ?? false,
modelUploadDisabled: this.#configuration.get('modelUploadDisabled') ?? false,
showGPUPromotion: this.#configuration.get